LCOV - code coverage report
Current view: top level - intl/icu/source/common - utrie.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 518 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 23 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2001-2012, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  utrie.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2001oct20
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   This is a common implementation of a "folded" trie.
      19             : *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
      20             : *   Unicode code points (0..0x10ffff).
      21             : */
      22             : 
      23             : #ifdef UTRIE_DEBUG
      24             : #   include <stdio.h>
      25             : #endif
      26             : 
      27             : #include "unicode/utypes.h"
      28             : #include "cmemory.h"
      29             : #include "utrie.h"
      30             : 
      31             : /* miscellaneous ------------------------------------------------------------ */
      32             : 
      33             : #undef ABS
      34             : #define ABS(x) ((x)>=0 ? (x) : -(x))
      35             : 
      36             : static inline UBool
      37           0 : equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
      38           0 :     while(length>0 && *s==*t) {
      39           0 :         ++s;
      40           0 :         ++t;
      41           0 :         --length;
      42             :     }
      43           0 :     return (UBool)(length==0);
      44             : }
      45             : 
      46             : /* Building a trie ----------------------------------------------------------*/
      47             : 
      48             : U_CAPI UNewTrie * U_EXPORT2
      49           0 : utrie_open(UNewTrie *fillIn,
      50             :            uint32_t *aliasData, int32_t maxDataLength,
      51             :            uint32_t initialValue, uint32_t leadUnitValue,
      52             :            UBool latin1Linear) {
      53             :     UNewTrie *trie;
      54             :     int32_t i, j;
      55             : 
      56           0 :     if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
      57           0 :         (latin1Linear && maxDataLength<1024)
      58             :     ) {
      59           0 :         return NULL;
      60             :     }
      61             : 
      62           0 :     if(fillIn!=NULL) {
      63           0 :         trie=fillIn;
      64             :     } else {
      65           0 :         trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie));
      66           0 :         if(trie==NULL) {
      67           0 :             return NULL;
      68             :         }
      69             :     }
      70           0 :     uprv_memset(trie, 0, sizeof(UNewTrie));
      71           0 :     trie->isAllocated= (UBool)(fillIn==NULL);
      72             : 
      73           0 :     if(aliasData!=NULL) {
      74           0 :         trie->data=aliasData;
      75           0 :         trie->isDataAllocated=FALSE;
      76             :     } else {
      77           0 :         trie->data=(uint32_t *)uprv_malloc(maxDataLength*4);
      78           0 :         if(trie->data==NULL) {
      79           0 :             uprv_free(trie);
      80           0 :             return NULL;
      81             :         }
      82           0 :         trie->isDataAllocated=TRUE;
      83             :     }
      84             : 
      85             :     /* preallocate and reset the first data block (block index 0) */
      86           0 :     j=UTRIE_DATA_BLOCK_LENGTH;
      87             : 
      88           0 :     if(latin1Linear) {
      89             :         /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
      90             :         /* made sure above that maxDataLength>=1024 */
      91             : 
      92             :         /* set indexes to point to consecutive data blocks */
      93           0 :         i=0;
      94           0 :         do {
      95             :             /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
      96           0 :             trie->index[i++]=j;
      97           0 :             j+=UTRIE_DATA_BLOCK_LENGTH;
      98           0 :         } while(i<(256>>UTRIE_SHIFT));
      99             :     }
     100             : 
     101             :     /* reset the initially allocated blocks to the initial value */
     102           0 :     trie->dataLength=j;
     103           0 :     while(j>0) {
     104           0 :         trie->data[--j]=initialValue;
     105             :     }
     106             : 
     107           0 :     trie->leadUnitValue=leadUnitValue;
     108           0 :     trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
     109           0 :     trie->dataCapacity=maxDataLength;
     110           0 :     trie->isLatin1Linear=latin1Linear;
     111           0 :     trie->isCompacted=FALSE;
     112           0 :     return trie;
     113             : }
     114             : 
     115             : U_CAPI UNewTrie * U_EXPORT2
     116           0 : utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) {
     117             :     UNewTrie *trie;
     118             :     UBool isDataAllocated;
     119             : 
     120             :     /* do not clone if other is not valid or already compacted */
     121           0 :     if(other==NULL || other->data==NULL || other->isCompacted) {
     122           0 :         return NULL;
     123             :     }
     124             : 
     125             :     /* clone data */
     126           0 :     if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) {
     127           0 :         isDataAllocated=FALSE;
     128             :     } else {
     129           0 :         aliasDataCapacity=other->dataCapacity;
     130           0 :         aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4);
     131           0 :         if(aliasData==NULL) {
     132           0 :             return NULL;
     133             :         }
     134           0 :         isDataAllocated=TRUE;
     135             :     }
     136             : 
     137           0 :     trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
     138           0 :                     other->data[0], other->leadUnitValue,
     139           0 :                     other->isLatin1Linear);
     140           0 :     if(trie==NULL) {
     141           0 :         uprv_free(aliasData);
     142             :     } else {
     143           0 :         uprv_memcpy(trie->index, other->index, sizeof(trie->index));
     144           0 :         uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
     145           0 :         trie->dataLength=other->dataLength;
     146           0 :         trie->isDataAllocated=isDataAllocated;
     147             :     }
     148             : 
     149           0 :     return trie;
     150             : }
     151             : 
     152             : U_CAPI void U_EXPORT2
     153           0 : utrie_close(UNewTrie *trie) {
     154           0 :     if(trie!=NULL) {
     155           0 :         if(trie->isDataAllocated) {
     156           0 :             uprv_free(trie->data);
     157           0 :             trie->data=NULL;
     158             :         }
     159           0 :         if(trie->isAllocated) {
     160           0 :             uprv_free(trie);
     161             :         }
     162             :     }
     163           0 : }
     164             : 
     165             : U_CAPI uint32_t * U_EXPORT2
     166           0 : utrie_getData(UNewTrie *trie, int32_t *pLength) {
     167           0 :     if(trie==NULL || pLength==NULL) {
     168           0 :         return NULL;
     169             :     }
     170             : 
     171           0 :     *pLength=trie->dataLength;
     172           0 :     return trie->data;
     173             : }
     174             : 
     175             : static int32_t
     176           0 : utrie_allocDataBlock(UNewTrie *trie) {
     177             :     int32_t newBlock, newTop;
     178             : 
     179           0 :     newBlock=trie->dataLength;
     180           0 :     newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH;
     181           0 :     if(newTop>trie->dataCapacity) {
     182             :         /* out of memory in the data array */
     183           0 :         return -1;
     184             :     }
     185           0 :     trie->dataLength=newTop;
     186           0 :     return newBlock;
     187             : }
     188             : 
     189             : /**
     190             :  * No error checking for illegal arguments.
     191             :  *
     192             :  * @return -1 if no new data block available (out of memory in data array)
     193             :  * @internal
     194             :  */
     195             : static int32_t
     196           0 : utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
     197             :     int32_t indexValue, newBlock;
     198             : 
     199           0 :     c>>=UTRIE_SHIFT;
     200           0 :     indexValue=trie->index[c];
     201           0 :     if(indexValue>0) {
     202           0 :         return indexValue;
     203             :     }
     204             : 
     205             :     /* allocate a new data block */
     206           0 :     newBlock=utrie_allocDataBlock(trie);
     207           0 :     if(newBlock<0) {
     208             :         /* out of memory in the data array */
     209           0 :         return -1;
     210             :     }
     211           0 :     trie->index[c]=newBlock;
     212             : 
     213             :     /* copy-on-write for a block from a setRange() */
     214           0 :     uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
     215           0 :     return newBlock;
     216             : }
     217             : 
     218             : /**
     219             :  * @return TRUE if the value was successfully set
     220             :  */
     221             : U_CAPI UBool U_EXPORT2
     222           0 : utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) {
     223             :     int32_t block;
     224             : 
     225             :     /* valid, uncompacted trie and valid c? */
     226           0 :     if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
     227           0 :         return FALSE;
     228             :     }
     229             : 
     230           0 :     block=utrie_getDataBlock(trie, c);
     231           0 :     if(block<0) {
     232           0 :         return FALSE;
     233             :     }
     234             : 
     235           0 :     trie->data[block+(c&UTRIE_MASK)]=value;
     236           0 :     return TRUE;
     237             : }
     238             : 
     239             : U_CAPI uint32_t U_EXPORT2
     240           0 : utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
     241             :     int32_t block;
     242             : 
     243             :     /* valid, uncompacted trie and valid c? */
     244           0 :     if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
     245           0 :         if(pInBlockZero!=NULL) {
     246           0 :             *pInBlockZero=TRUE;
     247             :         }
     248           0 :         return 0;
     249             :     }
     250             : 
     251           0 :     block=trie->index[c>>UTRIE_SHIFT];
     252           0 :     if(pInBlockZero!=NULL) {
     253           0 :         *pInBlockZero= (UBool)(block==0);
     254             :     }
     255             : 
     256           0 :     return trie->data[ABS(block)+(c&UTRIE_MASK)];
     257             : }
     258             : 
     259             : /**
     260             :  * @internal
     261             :  */
     262             : static void
     263           0 : utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
     264             :                 uint32_t value, uint32_t initialValue, UBool overwrite) {
     265             :     uint32_t *pLimit;
     266             : 
     267           0 :     pLimit=block+limit;
     268           0 :     block+=start;
     269           0 :     if(overwrite) {
     270           0 :         while(block<pLimit) {
     271           0 :             *block++=value;
     272             :         }
     273             :     } else {
     274           0 :         while(block<pLimit) {
     275           0 :             if(*block==initialValue) {
     276           0 :                 *block=value;
     277             :             }
     278           0 :             ++block;
     279             :         }
     280             :     }
     281           0 : }
     282             : 
     283             : U_CAPI UBool U_EXPORT2
     284           0 : utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
     285             :     /*
     286             :      * repeat value in [start..limit[
     287             :      * mark index values for repeat-data blocks by setting bit 31 of the index values
     288             :      * fill around existing values if any, if(overwrite)
     289             :      */
     290             :     uint32_t initialValue;
     291             :     int32_t block, rest, repeatBlock;
     292             : 
     293             :     /* valid, uncompacted trie and valid indexes? */
     294           0 :     if( trie==NULL || trie->isCompacted ||
     295           0 :         (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit
     296             :     ) {
     297           0 :         return FALSE;
     298             :     }
     299           0 :     if(start==limit) {
     300           0 :         return TRUE; /* nothing to do */
     301             :     }
     302             : 
     303           0 :     initialValue=trie->data[0];
     304           0 :     if(start&UTRIE_MASK) {
     305             :         UChar32 nextStart;
     306             : 
     307             :         /* set partial block at [start..following block boundary[ */
     308           0 :         block=utrie_getDataBlock(trie, start);
     309           0 :         if(block<0) {
     310           0 :             return FALSE;
     311             :         }
     312             : 
     313           0 :         nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
     314           0 :         if(nextStart<=limit) {
     315           0 :             utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
     316           0 :                             value, initialValue, overwrite);
     317           0 :             start=nextStart;
     318             :         } else {
     319           0 :             utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
     320           0 :                             value, initialValue, overwrite);
     321           0 :             return TRUE;
     322             :         }
     323             :     }
     324             : 
     325             :     /* number of positions in the last, partial block */
     326           0 :     rest=limit&UTRIE_MASK;
     327             : 
     328             :     /* round down limit to a block boundary */
     329           0 :     limit&=~UTRIE_MASK;
     330             : 
     331             :     /* iterate over all-value blocks */
     332           0 :     if(value==initialValue) {
     333           0 :         repeatBlock=0;
     334             :     } else {
     335           0 :         repeatBlock=-1;
     336             :     }
     337           0 :     while(start<limit) {
     338             :         /* get index value */
     339           0 :         block=trie->index[start>>UTRIE_SHIFT];
     340           0 :         if(block>0) {
     341             :             /* already allocated, fill in value */
     342           0 :             utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
     343           0 :         } else if(trie->data[-block]!=value && (block==0 || overwrite)) {
     344             :             /* set the repeatBlock instead of the current block 0 or range block */
     345           0 :             if(repeatBlock>=0) {
     346           0 :                 trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
     347             :             } else {
     348             :                 /* create and set and fill the repeatBlock */
     349           0 :                 repeatBlock=utrie_getDataBlock(trie, start);
     350           0 :                 if(repeatBlock<0) {
     351           0 :                     return FALSE;
     352             :                 }
     353             : 
     354             :                 /* set the negative block number to indicate that it is a repeat block */
     355           0 :                 trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
     356           0 :                 utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
     357             :             }
     358             :         }
     359             : 
     360           0 :         start+=UTRIE_DATA_BLOCK_LENGTH;
     361             :     }
     362             : 
     363           0 :     if(rest>0) {
     364             :         /* set partial block at [last block boundary..limit[ */
     365           0 :         block=utrie_getDataBlock(trie, start);
     366           0 :         if(block<0) {
     367           0 :             return FALSE;
     368             :         }
     369             : 
     370           0 :         utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
     371             :     }
     372             : 
     373           0 :     return TRUE;
     374             : }
     375             : 
     376             : static int32_t
     377           0 : _findSameIndexBlock(const int32_t *idx, int32_t indexLength,
     378             :                     int32_t otherBlock) {
     379             :     int32_t block, i;
     380             : 
     381           0 :     for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) {
     382           0 :         for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) {
     383           0 :             if(idx[block+i]!=idx[otherBlock+i]) {
     384           0 :                 break;
     385             :             }
     386             :         }
     387           0 :         if(i==UTRIE_SURROGATE_BLOCK_COUNT) {
     388           0 :             return block;
     389             :         }
     390             :     }
     391           0 :     return indexLength;
     392             : }
     393             : 
     394             : /*
     395             :  * Fold the normalization data for supplementary code points into
     396             :  * a compact area on top of the BMP-part of the trie index,
     397             :  * with the lead surrogates indexing this compact area.
     398             :  *
     399             :  * Duplicate the index values for lead surrogates:
     400             :  * From inside the BMP area, where some may be overridden with folded values,
     401             :  * to just after the BMP area, where they can be retrieved for
     402             :  * code point lookups.
     403             :  */
     404             : static void
     405           0 : utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) {
     406             :     int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT];
     407             :     int32_t *idx;
     408             :     uint32_t value;
     409             :     UChar32 c;
     410             :     int32_t indexLength, block;
     411             : #ifdef UTRIE_DEBUG
     412             :     int countLeadCUWithData=0;
     413             : #endif
     414             : 
     415           0 :     idx=trie->index;
     416             : 
     417             :     /* copy the lead surrogate indexes into a temporary array */
     418           0 :     uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT);
     419             : 
     420             :     /*
     421             :      * set all values for lead surrogate code *units* to leadUnitValue
     422             :      * so that, by default, runtime lookups will find no data for associated
     423             :      * supplementary code points, unless there is data for such code points
     424             :      * which will result in a non-zero folding value below that is set for
     425             :      * the respective lead units
     426             :      *
     427             :      * the above saved the indexes for surrogate code *points*
     428             :      * fill the indexes with simplified code from utrie_setRange32()
     429             :      */
     430           0 :     if(trie->leadUnitValue==trie->data[0]) {
     431           0 :         block=0; /* leadUnitValue==initialValue, use all-initial-value block */
     432             :     } else {
     433             :         /* create and fill the repeatBlock */
     434           0 :         block=utrie_allocDataBlock(trie);
     435           0 :         if(block<0) {
     436             :             /* data table overflow */
     437           0 :             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     438           0 :             return;
     439             :         }
     440           0 :         utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE);
     441           0 :         block=-block; /* negative block number to indicate that it is a repeat block */
     442             :     }
     443           0 :     for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) {
     444           0 :         trie->index[c]=block;
     445             :     }
     446             : 
     447             :     /*
     448             :      * Fold significant index values into the area just after the BMP indexes.
     449             :      * In case the first lead surrogate has significant data,
     450             :      * its index block must be used first (in which case the folding is a no-op).
     451             :      * Later all folded index blocks are moved up one to insert the copied
     452             :      * lead surrogate indexes.
     453             :      */
     454           0 :     indexLength=UTRIE_BMP_INDEX_LENGTH;
     455             : 
     456             :     /* search for any index (stage 1) entries for supplementary code points */
     457           0 :     for(c=0x10000; c<0x110000;) {
     458           0 :         if(idx[c>>UTRIE_SHIFT]!=0) {
     459             :             /* there is data, treat the full block for a lead surrogate */
     460           0 :             c&=~0x3ff;
     461             : 
     462             : #ifdef UTRIE_DEBUG
     463             :             ++countLeadCUWithData;
     464             :             /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */
     465             : #endif
     466             : 
     467             :             /* is there an identical index block? */
     468           0 :             block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT);
     469             : 
     470             :             /*
     471             :              * get a folded value for [c..c+0x400[ and,
     472             :              * if different from the value for the lead surrogate code point,
     473             :              * set it for the lead surrogate code unit
     474             :              */
     475           0 :             value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
     476           0 :             if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
     477           0 :                 if(!utrie_set32(trie, U16_LEAD(c), value)) {
     478             :                     /* data table overflow */
     479           0 :                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     480           0 :                     return;
     481             :                 }
     482             : 
     483             :                 /* if we did not find an identical index block... */
     484           0 :                 if(block==indexLength) {
     485             :                     /* move the actual index (stage 1) entries from the supplementary position to the new one */
     486           0 :                     uprv_memmove(idx+indexLength,
     487             :                                  idx+(c>>UTRIE_SHIFT),
     488           0 :                                  4*UTRIE_SURROGATE_BLOCK_COUNT);
     489           0 :                     indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
     490             :                 }
     491             :             }
     492           0 :             c+=0x400;
     493             :         } else {
     494           0 :             c+=UTRIE_DATA_BLOCK_LENGTH;
     495             :         }
     496             :     }
     497             : #ifdef UTRIE_DEBUG
     498             :     if(countLeadCUWithData>0) {
     499             :         printf("supplementary data for %d lead surrogates\n", countLeadCUWithData);
     500             :     }
     501             : #endif
     502             : 
     503             :     /*
     504             :      * index array overflow?
     505             :      * This is to guarantee that a folding offset is of the form
     506             :      * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
     507             :      * If the index is too large, then n>=1024 and more than 10 bits are necessary.
     508             :      *
     509             :      * In fact, it can only ever become n==1024 with completely unfoldable data and
     510             :      * the additional block of duplicated values for lead surrogates.
     511             :      */
     512           0 :     if(indexLength>=UTRIE_MAX_INDEX_LENGTH) {
     513           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     514           0 :         return;
     515             :     }
     516             : 
     517             :     /*
     518             :      * make space for the lead surrogate index block and
     519             :      * insert it between the BMP indexes and the folded ones
     520             :      */
     521           0 :     uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT,
     522             :                  idx+UTRIE_BMP_INDEX_LENGTH,
     523           0 :                  4*(indexLength-UTRIE_BMP_INDEX_LENGTH));
     524           0 :     uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH,
     525             :                 leadIndexes,
     526           0 :                 4*UTRIE_SURROGATE_BLOCK_COUNT);
     527           0 :     indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
     528             : 
     529             : #ifdef UTRIE_DEBUG
     530             :     printf("trie index count: BMP %ld  all Unicode %ld  folded %ld\n",
     531             :            UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength);
     532             : #endif
     533             : 
     534           0 :     trie->indexLength=indexLength;
     535             : }
     536             : 
     537             : /*
     538             :  * Set a value in the trie index map to indicate which data block
     539             :  * is referenced and which one is not.
     540             :  * utrie_compact() will remove data blocks that are not used at all.
     541             :  * Set
     542             :  * - 0 if it is used
     543             :  * - -1 if it is not used
     544             :  */
     545             : static void
     546           0 : _findUnusedBlocks(UNewTrie *trie) {
     547             :     int32_t i;
     548             : 
     549             :     /* fill the entire map with "not used" */
     550           0 :     uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4);
     551             : 
     552             :     /* mark each block that _is_ used with 0 */
     553           0 :     for(i=0; i<trie->indexLength; ++i) {
     554           0 :         trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0;
     555             :     }
     556             : 
     557             :     /* never move the all-initial-value block 0 */
     558           0 :     trie->map[0]=0;
     559           0 : }
     560             : 
     561             : static int32_t
     562           0 : _findSameDataBlock(const uint32_t *data, int32_t dataLength,
     563             :                    int32_t otherBlock, int32_t step) {
     564             :     int32_t block;
     565             : 
     566             :     /* ensure that we do not even partially get past dataLength */
     567           0 :     dataLength-=UTRIE_DATA_BLOCK_LENGTH;
     568             : 
     569           0 :     for(block=0; block<=dataLength; block+=step) {
     570           0 :         if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) {
     571           0 :             return block;
     572             :         }
     573             :     }
     574           0 :     return -1;
     575             : }
     576             : 
     577             : /*
     578             :  * Compact a folded build-time trie.
     579             :  *
     580             :  * The compaction
     581             :  * - removes blocks that are identical with earlier ones
     582             :  * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
     583             :  * - moves blocks in steps of the data granularity
     584             :  * - moves and overlaps blocks that overlap with multiple values in the overlap region
     585             :  *
     586             :  * It does not
     587             :  * - try to move and overlap blocks that are not already adjacent
     588             :  */
     589             : static void
     590           0 : utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
     591             :     int32_t i, start, newStart, overlapStart;
     592             : 
     593           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     594           0 :         return;
     595             :     }
     596             : 
     597             :     /* valid, uncompacted trie? */
     598           0 :     if(trie==NULL) {
     599           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     600           0 :         return;
     601             :     }
     602           0 :     if(trie->isCompacted) {
     603           0 :         return; /* nothing left to do */
     604             :     }
     605             : 
     606             :     /* compaction */
     607             : 
     608             :     /* initialize the index map with "block is used/unused" flags */
     609           0 :     _findUnusedBlocks(trie);
     610             : 
     611             :     /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
     612           0 :     if(trie->isLatin1Linear && UTRIE_SHIFT<=8) {
     613           0 :         overlapStart=UTRIE_DATA_BLOCK_LENGTH+256;
     614             :     } else {
     615           0 :         overlapStart=UTRIE_DATA_BLOCK_LENGTH;
     616             :     }
     617             : 
     618           0 :     newStart=UTRIE_DATA_BLOCK_LENGTH;
     619           0 :     for(start=newStart; start<trie->dataLength;) {
     620             :         /*
     621             :          * start: index of first entry of current block
     622             :          * newStart: index where the current block is to be moved
     623             :          *           (right after current end of already-compacted data)
     624             :          */
     625             : 
     626             :         /* skip blocks that are not used */
     627           0 :         if(trie->map[start>>UTRIE_SHIFT]<0) {
     628             :             /* advance start to the next block */
     629           0 :             start+=UTRIE_DATA_BLOCK_LENGTH;
     630             : 
     631             :             /* leave newStart with the previous block! */
     632           0 :             continue;
     633             :         }
     634             : 
     635             :         /* search for an identical block */
     636           0 :         if( start>=overlapStart &&
     637           0 :             (i=_findSameDataBlock(trie->data, newStart, start,
     638             :                             overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH))
     639             :              >=0
     640             :         ) {
     641             :             /* found an identical block, set the other block's index value for the current block */
     642           0 :             trie->map[start>>UTRIE_SHIFT]=i;
     643             : 
     644             :             /* advance start to the next block */
     645           0 :             start+=UTRIE_DATA_BLOCK_LENGTH;
     646             : 
     647             :             /* leave newStart with the previous block! */
     648           0 :             continue;
     649             :         }
     650             : 
     651             :         /* see if the beginning of this block can be overlapped with the end of the previous block */
     652           0 :         if(overlap && start>=overlapStart) {
     653             :             /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
     654           0 :             for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY;
     655           0 :                 i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i);
     656           0 :                 i-=UTRIE_DATA_GRANULARITY) {}
     657             :         } else {
     658           0 :             i=0;
     659             :         }
     660             : 
     661           0 :         if(i>0) {
     662             :             /* some overlap */
     663           0 :             trie->map[start>>UTRIE_SHIFT]=newStart-i;
     664             : 
     665             :             /* move the non-overlapping indexes to their new positions */
     666           0 :             start+=i;
     667           0 :             for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) {
     668           0 :                 trie->data[newStart++]=trie->data[start++];
     669             :             }
     670           0 :         } else if(newStart<start) {
     671             :             /* no overlap, just move the indexes to their new positions */
     672           0 :             trie->map[start>>UTRIE_SHIFT]=newStart;
     673           0 :             for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) {
     674           0 :                 trie->data[newStart++]=trie->data[start++];
     675             :             }
     676             :         } else /* no overlap && newStart==start */ {
     677           0 :             trie->map[start>>UTRIE_SHIFT]=start;
     678           0 :             newStart+=UTRIE_DATA_BLOCK_LENGTH;
     679           0 :             start=newStart;
     680             :         }
     681             :     }
     682             : 
     683             :     /* now adjust the index (stage 1) table */
     684           0 :     for(i=0; i<trie->indexLength; ++i) {
     685           0 :         trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT];
     686             :     }
     687             : 
     688             : #ifdef UTRIE_DEBUG
     689             :     /* we saved some space */
     690             :     printf("compacting trie: count of 32-bit words %lu->%lu\n",
     691             :             (long)trie->dataLength, (long)newStart);
     692             : #endif
     693             : 
     694           0 :     trie->dataLength=newStart;
     695             : }
     696             : 
     697             : /* serialization ------------------------------------------------------------ */
     698             : 
     699             : /*
     700             :  * Default function for the folding value:
     701             :  * Just store the offset (16 bits) if there is any non-initial-value entry.
     702             :  *
     703             :  * The offset parameter is never 0.
     704             :  * Returning the offset itself is safe for UTRIE_SHIFT>=5 because
     705             :  * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800
     706             :  * which fits into 16-bit trie values;
     707             :  * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases.
     708             :  *
     709             :  * Theoretically, it would be safer for all possible UTRIE_SHIFT including
     710             :  * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS
     711             :  * which would always result in a value of 0x40..0x43f
     712             :  * (start/end 1k blocks of supplementary Unicode code points).
     713             :  * However, this would be uglier, and would not work for some existing
     714             :  * binary data file formats.
     715             :  *
     716             :  * Also, we do not plan to change UTRIE_SHIFT because it would change binary
     717             :  * data file formats, and we would probably not make it smaller because of
     718             :  * the then even larger BMP index length even for empty tries.
     719             :  */
     720             : static uint32_t U_CALLCONV
     721           0 : defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
     722             :     uint32_t value, initialValue;
     723             :     UChar32 limit;
     724             :     UBool inBlockZero;
     725             : 
     726           0 :     initialValue=trie->data[0];
     727           0 :     limit=start+0x400;
     728           0 :     while(start<limit) {
     729           0 :         value=utrie_get32(trie, start, &inBlockZero);
     730           0 :         if(inBlockZero) {
     731           0 :             start+=UTRIE_DATA_BLOCK_LENGTH;
     732           0 :         } else if(value!=initialValue) {
     733           0 :             return (uint32_t)offset;
     734             :         } else {
     735           0 :             ++start;
     736             :         }
     737             :     }
     738           0 :     return 0;
     739             : }
     740             : 
     741             : U_CAPI int32_t U_EXPORT2
     742           0 : utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
     743             :                 UNewTrieGetFoldedValue *getFoldedValue,
     744             :                 UBool reduceTo16Bits,
     745             :                 UErrorCode *pErrorCode) {
     746             :     UTrieHeader *header;
     747             :     uint32_t *p;
     748             :     uint16_t *dest16;
     749             :     int32_t i, length;
     750           0 :     uint8_t* data = NULL;
     751             : 
     752             :     /* argument check */
     753           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     754           0 :         return 0;
     755             :     }
     756             : 
     757           0 :     if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) {
     758           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     759           0 :         return 0;
     760             :     }
     761           0 :     if(getFoldedValue==NULL) {
     762           0 :         getFoldedValue=defaultGetFoldedValue;
     763             :     }
     764             : 
     765           0 :     data = (uint8_t*)dt;
     766             :     /* fold and compact if necessary, also checks that indexLength is within limits */
     767           0 :     if(!trie->isCompacted) {
     768             :         /* compact once without overlap to improve folding */
     769           0 :         utrie_compact(trie, FALSE, pErrorCode);
     770             : 
     771             :         /* fold the supplementary part of the index array */
     772           0 :         utrie_fold(trie, getFoldedValue, pErrorCode);
     773             : 
     774             :         /* compact again with overlap for minimum data array length */
     775           0 :         utrie_compact(trie, TRUE, pErrorCode);
     776             : 
     777           0 :         trie->isCompacted=TRUE;
     778           0 :         if(U_FAILURE(*pErrorCode)) {
     779           0 :             return 0;
     780             :         }
     781             :     }
     782             : 
     783             :     /* is dataLength within limits? */
     784           0 :     if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) {
     785           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     786             :     }
     787             : 
     788           0 :     length=sizeof(UTrieHeader)+2*trie->indexLength;
     789           0 :     if(reduceTo16Bits) {
     790           0 :         length+=2*trie->dataLength;
     791             :     } else {
     792           0 :         length+=4*trie->dataLength;
     793             :     }
     794             : 
     795           0 :     if(length>capacity) {
     796           0 :         return length; /* preflighting */
     797             :     }
     798             : 
     799             : #ifdef UTRIE_DEBUG
     800             :     printf("**UTrieLengths(serialize)** index:%6ld  data:%6ld  serialized:%6ld\n",
     801             :            (long)trie->indexLength, (long)trie->dataLength, (long)length);
     802             : #endif
     803             : 
     804             :     /* set the header fields */
     805           0 :     header=(UTrieHeader *)data;
     806           0 :     data+=sizeof(UTrieHeader);
     807             : 
     808           0 :     header->signature=0x54726965; /* "Trie" */
     809           0 :     header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT);
     810             : 
     811           0 :     if(!reduceTo16Bits) {
     812           0 :         header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT;
     813             :     }
     814           0 :     if(trie->isLatin1Linear) {
     815           0 :         header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR;
     816             :     }
     817             : 
     818           0 :     header->indexLength=trie->indexLength;
     819           0 :     header->dataLength=trie->dataLength;
     820             : 
     821             :     /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
     822           0 :     if(reduceTo16Bits) {
     823             :         /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
     824           0 :         p=(uint32_t *)trie->index;
     825           0 :         dest16=(uint16_t *)data;
     826           0 :         for(i=trie->indexLength; i>0; --i) {
     827           0 :             *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT);
     828             :         }
     829             : 
     830             :         /* write 16-bit data values */
     831           0 :         p=trie->data;
     832           0 :         for(i=trie->dataLength; i>0; --i) {
     833           0 :             *dest16++=(uint16_t)*p++;
     834             :         }
     835             :     } else {
     836             :         /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
     837           0 :         p=(uint32_t *)trie->index;
     838           0 :         dest16=(uint16_t *)data;
     839           0 :         for(i=trie->indexLength; i>0; --i) {
     840           0 :             *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT);
     841             :         }
     842             : 
     843             :         /* write 32-bit data values */
     844           0 :         uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength);
     845             :     }
     846             : 
     847           0 :     return length;
     848             : }
     849             : 
     850             : /* inverse to defaultGetFoldedValue() */
     851             : U_CAPI int32_t U_EXPORT2
     852           0 : utrie_defaultGetFoldingOffset(uint32_t data) {
     853           0 :     return (int32_t)data;
     854             : }
     855             : 
     856             : U_CAPI int32_t U_EXPORT2
     857           0 : utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) {
     858             :     const UTrieHeader *header;
     859             :     const uint16_t *p16;
     860             :     uint32_t options;
     861             : 
     862           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     863           0 :         return -1;
     864             :     }
     865             : 
     866             :     /* enough data for a trie header? */
     867           0 :     if(length<(int32_t)sizeof(UTrieHeader)) {
     868           0 :         *pErrorCode=U_INVALID_FORMAT_ERROR;
     869           0 :         return -1;
     870             :     }
     871             : 
     872             :     /* check the signature */
     873           0 :     header=(const UTrieHeader *)data;
     874           0 :     if(header->signature!=0x54726965) {
     875           0 :         *pErrorCode=U_INVALID_FORMAT_ERROR;
     876           0 :         return -1;
     877             :     }
     878             : 
     879             :     /* get the options and check the shift values */
     880           0 :     options=header->options;
     881           0 :     if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
     882           0 :         ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT
     883             :     ) {
     884           0 :         *pErrorCode=U_INVALID_FORMAT_ERROR;
     885           0 :         return -1;
     886             :     }
     887           0 :     trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0);
     888             : 
     889             :     /* get the length values */
     890           0 :     trie->indexLength=header->indexLength;
     891           0 :     trie->dataLength=header->dataLength;
     892             : 
     893           0 :     length-=(int32_t)sizeof(UTrieHeader);
     894             : 
     895             :     /* enough data for the index? */
     896           0 :     if(length<2*trie->indexLength) {
     897           0 :         *pErrorCode=U_INVALID_FORMAT_ERROR;
     898           0 :         return -1;
     899             :     }
     900           0 :     p16=(const uint16_t *)(header+1);
     901           0 :     trie->index=p16;
     902           0 :     p16+=trie->indexLength;
     903           0 :     length-=2*trie->indexLength;
     904             : 
     905             :     /* get the data */
     906           0 :     if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) {
     907           0 :         if(length<4*trie->dataLength) {
     908           0 :             *pErrorCode=U_INVALID_FORMAT_ERROR;
     909           0 :             return -1;
     910             :         }
     911           0 :         trie->data32=(const uint32_t *)p16;
     912           0 :         trie->initialValue=trie->data32[0];
     913           0 :         length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
     914             :     } else {
     915           0 :         if(length<2*trie->dataLength) {
     916           0 :             *pErrorCode=U_INVALID_FORMAT_ERROR;
     917           0 :             return -1;
     918             :         }
     919             : 
     920             :         /* the "data16" data is used via the index pointer */
     921           0 :         trie->data32=NULL;
     922           0 :         trie->initialValue=trie->index[trie->indexLength];
     923           0 :         length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
     924             :     }
     925             : 
     926           0 :     trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
     927             : 
     928           0 :     return length;
     929             : }
     930             : 
     931             : U_CAPI int32_t U_EXPORT2
     932           0 : utrie_unserializeDummy(UTrie *trie,
     933             :                        void *data, int32_t length,
     934             :                        uint32_t initialValue, uint32_t leadUnitValue,
     935             :                        UBool make16BitTrie,
     936             :                        UErrorCode *pErrorCode) {
     937             :     uint16_t *p16;
     938             :     int32_t actualLength, latin1Length, i, limit;
     939             :     uint16_t block;
     940             : 
     941           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     942           0 :         return -1;
     943             :     }
     944             : 
     945             :     /* calculate the actual size of the dummy trie data */
     946             : 
     947             :     /* max(Latin-1, block 0) */
     948           0 :     latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/
     949             : 
     950           0 :     trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT;
     951           0 :     trie->dataLength=latin1Length;
     952           0 :     if(leadUnitValue!=initialValue) {
     953           0 :         trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH;
     954             :     }
     955             : 
     956           0 :     actualLength=trie->indexLength*2;
     957           0 :     if(make16BitTrie) {
     958           0 :         actualLength+=trie->dataLength*2;
     959             :     } else {
     960           0 :         actualLength+=trie->dataLength*4;
     961             :     }
     962             : 
     963             :     /* enough space for the dummy trie? */
     964           0 :     if(length<actualLength) {
     965           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     966           0 :         return actualLength;
     967             :     }
     968             : 
     969           0 :     trie->isLatin1Linear=TRUE;
     970           0 :     trie->initialValue=initialValue;
     971             : 
     972             :     /* fill the index and data arrays */
     973           0 :     p16=(uint16_t *)data;
     974           0 :     trie->index=p16;
     975             : 
     976           0 :     if(make16BitTrie) {
     977             :         /* indexes to block 0 */
     978           0 :         block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT);
     979           0 :         limit=trie->indexLength;
     980           0 :         for(i=0; i<limit; ++i) {
     981           0 :             p16[i]=block;
     982             :         }
     983             : 
     984           0 :         if(leadUnitValue!=initialValue) {
     985             :             /* indexes for lead surrogate code units to the block after Latin-1 */
     986           0 :             block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
     987           0 :             i=0xd800>>UTRIE_SHIFT;
     988           0 :             limit=0xdc00>>UTRIE_SHIFT;
     989           0 :             for(; i<limit; ++i) {
     990           0 :                 p16[i]=block;
     991             :             }
     992             :         }
     993             : 
     994           0 :         trie->data32=NULL;
     995             : 
     996             :         /* Latin-1 data */
     997           0 :         p16+=trie->indexLength;
     998           0 :         for(i=0; i<latin1Length; ++i) {
     999           0 :             p16[i]=(uint16_t)initialValue;
    1000             :         }
    1001             : 
    1002             :         /* data for lead surrogate code units */
    1003           0 :         if(leadUnitValue!=initialValue) {
    1004           0 :             limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
    1005           0 :             for(/* i=latin1Length */; i<limit; ++i) {
    1006           0 :                 p16[i]=(uint16_t)leadUnitValue;
    1007             :             }
    1008             :         }
    1009             :     } else {
    1010             :         uint32_t *p32;
    1011             : 
    1012             :         /* indexes to block 0 */
    1013           0 :         uprv_memset(p16, 0, trie->indexLength*2);
    1014             : 
    1015           0 :         if(leadUnitValue!=initialValue) {
    1016             :             /* indexes for lead surrogate code units to the block after Latin-1 */
    1017           0 :             block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
    1018           0 :             i=0xd800>>UTRIE_SHIFT;
    1019           0 :             limit=0xdc00>>UTRIE_SHIFT;
    1020           0 :             for(; i<limit; ++i) {
    1021           0 :                 p16[i]=block;
    1022             :             }
    1023             :         }
    1024             : 
    1025           0 :         trie->data32=p32=(uint32_t *)(p16+trie->indexLength);
    1026             : 
    1027             :         /* Latin-1 data */
    1028           0 :         for(i=0; i<latin1Length; ++i) {
    1029           0 :             p32[i]=initialValue;
    1030             :         }
    1031             : 
    1032             :         /* data for lead surrogate code units */
    1033           0 :         if(leadUnitValue!=initialValue) {
    1034           0 :             limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
    1035           0 :             for(/* i=latin1Length */; i<limit; ++i) {
    1036           0 :                 p32[i]=leadUnitValue;
    1037             :             }
    1038             :         }
    1039             :     }
    1040             : 
    1041           0 :     trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
    1042             : 
    1043           0 :     return actualLength;
    1044             : }
    1045             : 
    1046             : /* enumeration -------------------------------------------------------------- */
    1047             : 
    1048             : /* default UTrieEnumValue() returns the input value itself */
    1049             : static uint32_t U_CALLCONV
    1050           0 : enumSameValue(const void * /*context*/, uint32_t value) {
    1051           0 :     return value;
    1052             : }
    1053             : 
    1054             : /**
    1055             :  * Enumerate all ranges of code points with the same relevant values.
    1056             :  * The values are transformed from the raw trie entries by the enumValue function.
    1057             :  */
    1058             : U_CAPI void U_EXPORT2
    1059           0 : utrie_enum(const UTrie *trie,
    1060             :            UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) {
    1061             :     const uint32_t *data32;
    1062             :     const uint16_t *idx;
    1063             : 
    1064             :     uint32_t value, prevValue, initialValue;
    1065             :     UChar32 c, prev;
    1066             :     int32_t l, i, j, block, prevBlock, nullBlock, offset;
    1067             : 
    1068             :     /* check arguments */
    1069           0 :     if(trie==NULL || trie->index==NULL || enumRange==NULL) {
    1070           0 :         return;
    1071             :     }
    1072           0 :     if(enumValue==NULL) {
    1073           0 :         enumValue=enumSameValue;
    1074             :     }
    1075             : 
    1076           0 :     idx=trie->index;
    1077           0 :     data32=trie->data32;
    1078             : 
    1079             :     /* get the enumeration value that corresponds to an initial-value trie data entry */
    1080           0 :     initialValue=enumValue(context, trie->initialValue);
    1081             : 
    1082           0 :     if(data32==NULL) {
    1083           0 :         nullBlock=trie->indexLength;
    1084             :     } else {
    1085           0 :         nullBlock=0;
    1086             :     }
    1087             : 
    1088             :     /* set variables for previous range */
    1089           0 :     prevBlock=nullBlock;
    1090           0 :     prev=0;
    1091           0 :     prevValue=initialValue;
    1092             : 
    1093             :     /* enumerate BMP - the main loop enumerates data blocks */
    1094           0 :     for(i=0, c=0; c<=0xffff; ++i) {
    1095           0 :         if(c==0xd800) {
    1096             :             /* skip lead surrogate code _units_, go to lead surr. code _points_ */
    1097           0 :             i=UTRIE_BMP_INDEX_LENGTH;
    1098           0 :         } else if(c==0xdc00) {
    1099             :             /* go back to regular BMP code points */
    1100           0 :             i=c>>UTRIE_SHIFT;
    1101             :         }
    1102             : 
    1103           0 :         block=idx[i]<<UTRIE_INDEX_SHIFT;
    1104           0 :         if(block==prevBlock) {
    1105             :             /* the block is the same as the previous one, and filled with value */
    1106           0 :             c+=UTRIE_DATA_BLOCK_LENGTH;
    1107           0 :         } else if(block==nullBlock) {
    1108             :             /* this is the all-initial-value block */
    1109           0 :             if(prevValue!=initialValue) {
    1110           0 :                 if(prev<c) {
    1111           0 :                     if(!enumRange(context, prev, c, prevValue)) {
    1112           0 :                         return;
    1113             :                     }
    1114             :                 }
    1115           0 :                 prevBlock=nullBlock;
    1116           0 :                 prev=c;
    1117           0 :                 prevValue=initialValue;
    1118             :             }
    1119           0 :             c+=UTRIE_DATA_BLOCK_LENGTH;
    1120             :         } else {
    1121           0 :             prevBlock=block;
    1122           0 :             for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
    1123           0 :                 value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
    1124           0 :                 if(value!=prevValue) {
    1125           0 :                     if(prev<c) {
    1126           0 :                         if(!enumRange(context, prev, c, prevValue)) {
    1127           0 :                             return;
    1128             :                         }
    1129             :                     }
    1130           0 :                     if(j>0) {
    1131             :                         /* the block is not filled with all the same value */
    1132           0 :                         prevBlock=-1;
    1133             :                     }
    1134           0 :                     prev=c;
    1135           0 :                     prevValue=value;
    1136             :                 }
    1137           0 :                 ++c;
    1138             :             }
    1139             :         }
    1140             :     }
    1141             : 
    1142             :     /* enumerate supplementary code points */
    1143           0 :     for(l=0xd800; l<0xdc00;) {
    1144             :         /* lead surrogate access */
    1145           0 :         offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT;
    1146           0 :         if(offset==nullBlock) {
    1147             :             /* no entries for a whole block of lead surrogates */
    1148           0 :             if(prevValue!=initialValue) {
    1149           0 :                 if(prev<c) {
    1150           0 :                     if(!enumRange(context, prev, c, prevValue)) {
    1151           0 :                         return;
    1152             :                     }
    1153             :                 }
    1154           0 :                 prevBlock=nullBlock;
    1155           0 :                 prev=c;
    1156           0 :                 prevValue=initialValue;
    1157             :             }
    1158             : 
    1159           0 :             l+=UTRIE_DATA_BLOCK_LENGTH;
    1160           0 :             c+=UTRIE_DATA_BLOCK_LENGTH<<10;
    1161           0 :             continue;
    1162             :         }
    1163             : 
    1164           0 :         value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)];
    1165             : 
    1166             :         /* enumerate trail surrogates for this lead surrogate */
    1167           0 :         offset=trie->getFoldingOffset(value);
    1168           0 :         if(offset<=0) {
    1169             :             /* no data for this lead surrogate */
    1170           0 :             if(prevValue!=initialValue) {
    1171           0 :                 if(prev<c) {
    1172           0 :                     if(!enumRange(context, prev, c, prevValue)) {
    1173           0 :                         return;
    1174             :                     }
    1175             :                 }
    1176           0 :                 prevBlock=nullBlock;
    1177           0 :                 prev=c;
    1178           0 :                 prevValue=initialValue;
    1179             :             }
    1180             : 
    1181             :             /* nothing else to do for the supplementary code points for this lead surrogate */
    1182           0 :             c+=0x400;
    1183             :         } else {
    1184             :             /* enumerate code points for this lead surrogate */
    1185           0 :             i=offset;
    1186           0 :             offset+=UTRIE_SURROGATE_BLOCK_COUNT;
    1187           0 :             do {
    1188             :                 /* copy of most of the body of the BMP loop */
    1189           0 :                 block=idx[i]<<UTRIE_INDEX_SHIFT;
    1190           0 :                 if(block==prevBlock) {
    1191             :                     /* the block is the same as the previous one, and filled with value */
    1192           0 :                     c+=UTRIE_DATA_BLOCK_LENGTH;
    1193           0 :                 } else if(block==nullBlock) {
    1194             :                     /* this is the all-initial-value block */
    1195           0 :                     if(prevValue!=initialValue) {
    1196           0 :                         if(prev<c) {
    1197           0 :                             if(!enumRange(context, prev, c, prevValue)) {
    1198           0 :                                 return;
    1199             :                             }
    1200             :                         }
    1201           0 :                         prevBlock=nullBlock;
    1202           0 :                         prev=c;
    1203           0 :                         prevValue=initialValue;
    1204             :                     }
    1205           0 :                     c+=UTRIE_DATA_BLOCK_LENGTH;
    1206             :                 } else {
    1207           0 :                     prevBlock=block;
    1208           0 :                     for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
    1209           0 :                         value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
    1210           0 :                         if(value!=prevValue) {
    1211           0 :                             if(prev<c) {
    1212           0 :                                 if(!enumRange(context, prev, c, prevValue)) {
    1213           0 :                                     return;
    1214             :                                 }
    1215             :                             }
    1216           0 :                             if(j>0) {
    1217             :                                 /* the block is not filled with all the same value */
    1218           0 :                                 prevBlock=-1;
    1219             :                             }
    1220           0 :                             prev=c;
    1221           0 :                             prevValue=value;
    1222             :                         }
    1223           0 :                         ++c;
    1224             :                     }
    1225             :                 }
    1226             :             } while(++i<offset);
    1227             :         }
    1228             : 
    1229           0 :         ++l;
    1230             :     }
    1231             : 
    1232             :     /* deliver last range */
    1233           0 :     enumRange(context, prev, c, prevValue);
    1234             : }

Generated by: LCOV version 1.13