LCOV - code coverage report
Current view: top level - intl/icu/source/common - utrie2_builder.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 601 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 30 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2001-2014, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  utrie2_builder.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2008sep26 (split off from utrie2.c)
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   This is a common implementation of a Unicode trie.
      19             : *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
      20             : *   Unicode code points (0..0x10ffff).
      21             : *   This is the second common version of a Unicode trie (hence the name UTrie2).
      22             : *   See utrie2.h for a comparison.
      23             : *
      24             : *   This file contains only the builder code.
      25             : *   See utrie2.c for the runtime and enumeration code.
      26             : */
      27             : #ifdef UTRIE2_DEBUG
      28             : #   include <stdio.h>
      29             : #endif
      30             : 
      31             : #include "unicode/utypes.h"
      32             : #include "cmemory.h"
      33             : #include "utrie2.h"
      34             : #include "utrie2_impl.h"
      35             : 
      36             : #include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
      37             : 
      38             : /* Implementation notes ----------------------------------------------------- */
      39             : 
      40             : /*
      41             :  * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
      42             :  * have been chosen to minimize trie sizes overall.
      43             :  * Most of the code is flexible enough to work with a range of values,
      44             :  * within certain limits.
      45             :  *
      46             :  * Exception: Support for separate values for lead surrogate code _units_
      47             :  * vs. code _points_ was added after the constants were fixed,
      48             :  * and has not been tested nor particularly designed for different constant values.
      49             :  * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
      50             :  * part and back.)
      51             :  *
      52             :  * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
      53             :  * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
      54             :  * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
      55             :  * remapping) stops working.
      56             :  *
      57             :  * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
      58             :  * assumes that a single index-2 block is used for 0x400 code points
      59             :  * corresponding to one lead surrogate.
      60             :  *
      61             :  * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
      62             :  * more than one Unicode plane, and the split of the index-2 table into a BMP
      63             :  * part and a supplementary part, with a gap in between, would not work.
      64             :  *
      65             :  * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
      66             :  * there is data with more than 64k distinct values,
      67             :  * for example for Unihan collation with a separate collation weight per
      68             :  * Han character.
      69             :  */
      70             : 
      71             : /* Building a trie ----------------------------------------------------------*/
      72             : 
      73             : enum {
      74             :     /** The null index-2 block, following the gap in the index-2 table. */
      75             :     UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
      76             : 
      77             :     /** The start of allocated index-2 blocks. */
      78             :     UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
      79             : 
      80             :     /**
      81             :      * The null data block.
      82             :      * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
      83             :      * to work with 6-bit trail bytes from 2-byte UTF-8.
      84             :      */
      85             :     UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
      86             : 
      87             :     /** The start of allocated data blocks. */
      88             :     UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
      89             : 
      90             :     /**
      91             :      * The start of data blocks for U+0800 and above.
      92             :      * Below, compaction uses a block length of 64 for 2-byte UTF-8.
      93             :      * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
      94             :      * Data values for 0x780 code points beyond ASCII.
      95             :      */
      96             :     UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
      97             : };
      98             : 
      99             : /* Start with allocation of 16k data entries. */
     100             : #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
     101             : 
     102             : /* Grow about 8x each time. */
     103             : #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
     104             : 
     105             : static int32_t
     106             : allocIndex2Block(UNewTrie2 *trie);
     107             : 
     108             : U_CAPI UTrie2 * U_EXPORT2
     109           0 : utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
     110             :     UTrie2 *trie;
     111             :     UNewTrie2 *newTrie;
     112             :     uint32_t *data;
     113             :     int32_t i, j;
     114             : 
     115           0 :     if(U_FAILURE(*pErrorCode)) {
     116           0 :         return NULL;
     117             :     }
     118             : 
     119           0 :     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
     120           0 :     newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
     121           0 :     data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
     122           0 :     if(trie==NULL || newTrie==NULL || data==NULL) {
     123           0 :         uprv_free(trie);
     124           0 :         uprv_free(newTrie);
     125           0 :         uprv_free(data);
     126           0 :         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     127           0 :         return 0;
     128             :     }
     129             : 
     130           0 :     uprv_memset(trie, 0, sizeof(UTrie2));
     131           0 :     trie->initialValue=initialValue;
     132           0 :     trie->errorValue=errorValue;
     133           0 :     trie->highStart=0x110000;
     134           0 :     trie->newTrie=newTrie;
     135             : 
     136           0 :     newTrie->data=data;
     137           0 :     newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
     138           0 :     newTrie->initialValue=initialValue;
     139           0 :     newTrie->errorValue=errorValue;
     140           0 :     newTrie->highStart=0x110000;
     141           0 :     newTrie->firstFreeBlock=0;  /* no free block in the list */
     142           0 :     newTrie->isCompacted=FALSE;
     143             : 
     144             :     /*
     145             :      * preallocate and reset
     146             :      * - ASCII
     147             :      * - the bad-UTF-8-data block
     148             :      * - the null data block
     149             :      */
     150           0 :     for(i=0; i<0x80; ++i) {
     151           0 :         newTrie->data[i]=initialValue;
     152             :     }
     153           0 :     for(; i<0xc0; ++i) {
     154           0 :         newTrie->data[i]=errorValue;
     155             :     }
     156           0 :     for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
     157           0 :         newTrie->data[i]=initialValue;
     158             :     }
     159           0 :     newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
     160           0 :     newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
     161             : 
     162             :     /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
     163           0 :     for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
     164           0 :         newTrie->index2[i]=j;
     165           0 :         newTrie->map[i]=1;
     166             :     }
     167             :     /* reference counts for the bad-UTF-8-data block */
     168           0 :     for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
     169           0 :         newTrie->map[i]=0;
     170             :     }
     171             :     /*
     172             :      * Reference counts for the null data block: all blocks except for the ASCII blocks.
     173             :      * Plus 1 so that we don't drop this block during compaction.
     174             :      * Plus as many as needed for lead surrogate code points.
     175             :      */
     176             :     /* i==newTrie->dataNullOffset */
     177           0 :     newTrie->map[i++]=
     178             :         (0x110000>>UTRIE2_SHIFT_2)-
     179             :         (0x80>>UTRIE2_SHIFT_2)+
     180             :         1+
     181             :         UTRIE2_LSCP_INDEX_2_LENGTH;
     182           0 :     j+=UTRIE2_DATA_BLOCK_LENGTH;
     183           0 :     for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
     184           0 :         newTrie->map[i]=0;
     185             :     }
     186             : 
     187             :     /*
     188             :      * set the remaining indexes in the BMP index-2 block
     189             :      * to the null data block
     190             :      */
     191           0 :     for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
     192           0 :         newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
     193             :     }
     194             : 
     195             :     /*
     196             :      * Fill the index gap with impossible values so that compaction
     197             :      * does not overlap other index-2 blocks with the gap.
     198             :      */
     199           0 :     for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
     200           0 :         newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
     201             :     }
     202             : 
     203             :     /* set the indexes in the null index-2 block */
     204           0 :     for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
     205           0 :         newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
     206             :     }
     207           0 :     newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
     208           0 :     newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
     209             : 
     210             :     /* set the index-1 indexes for the linear index-2 block */
     211           0 :     for(i=0, j=0;
     212           0 :         i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
     213           0 :         ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
     214             :     ) {
     215           0 :         newTrie->index1[i]=j;
     216             :     }
     217             : 
     218             :     /* set the remaining index-1 indexes to the null index-2 block */
     219           0 :     for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
     220           0 :         newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
     221             :     }
     222             : 
     223             :     /*
     224             :      * Preallocate and reset data for U+0080..U+07ff,
     225             :      * for 2-byte UTF-8 which will be compacted in 64-blocks
     226             :      * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
     227             :      */
     228           0 :     for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
     229           0 :         utrie2_set32(trie, i, initialValue, pErrorCode);
     230             :     }
     231             : 
     232           0 :     return trie;
     233             : }
     234             : 
     235             : static UNewTrie2 *
     236           0 : cloneBuilder(const UNewTrie2 *other) {
     237             :     UNewTrie2 *trie;
     238             : 
     239           0 :     trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
     240           0 :     if(trie==NULL) {
     241           0 :         return NULL;
     242             :     }
     243             : 
     244           0 :     trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
     245           0 :     if(trie->data==NULL) {
     246           0 :         uprv_free(trie);
     247           0 :         return NULL;
     248             :     }
     249           0 :     trie->dataCapacity=other->dataCapacity;
     250             : 
     251             :     /* clone data */
     252           0 :     uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
     253           0 :     uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
     254           0 :     trie->index2NullOffset=other->index2NullOffset;
     255           0 :     trie->index2Length=other->index2Length;
     256             : 
     257           0 :     uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
     258           0 :     trie->dataNullOffset=other->dataNullOffset;
     259           0 :     trie->dataLength=other->dataLength;
     260             : 
     261             :     /* reference counters */
     262           0 :     if(other->isCompacted) {
     263           0 :         trie->firstFreeBlock=0;
     264             :     } else {
     265           0 :         uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
     266           0 :         trie->firstFreeBlock=other->firstFreeBlock;
     267             :     }
     268             : 
     269           0 :     trie->initialValue=other->initialValue;
     270           0 :     trie->errorValue=other->errorValue;
     271           0 :     trie->highStart=other->highStart;
     272           0 :     trie->isCompacted=other->isCompacted;
     273             : 
     274           0 :     return trie;
     275             : }
     276             : 
     277             : U_CAPI UTrie2 * U_EXPORT2
     278           0 : utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
     279             :     UTrie2 *trie;
     280             : 
     281           0 :     if(U_FAILURE(*pErrorCode)) {
     282           0 :         return NULL;
     283             :     }
     284           0 :     if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
     285           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     286           0 :         return NULL;
     287             :     }
     288             : 
     289           0 :     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
     290           0 :     if(trie==NULL) {
     291           0 :         return NULL;
     292             :     }
     293           0 :     uprv_memcpy(trie, other, sizeof(UTrie2));
     294             : 
     295           0 :     if(other->memory!=NULL) {
     296           0 :         trie->memory=uprv_malloc(other->length);
     297           0 :         if(trie->memory!=NULL) {
     298           0 :             trie->isMemoryOwned=TRUE;
     299           0 :             uprv_memcpy(trie->memory, other->memory, other->length);
     300             : 
     301             :             /* make the clone's pointers point to its own memory */
     302           0 :             trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
     303           0 :             if(other->data16!=NULL) {
     304           0 :                 trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
     305             :             }
     306           0 :             if(other->data32!=NULL) {
     307           0 :                 trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
     308             :             }
     309             :         }
     310             :     } else /* other->newTrie!=NULL */ {
     311           0 :         trie->newTrie=cloneBuilder(other->newTrie);
     312             :     }
     313             : 
     314           0 :     if(trie->memory==NULL && trie->newTrie==NULL) {
     315           0 :         uprv_free(trie);
     316           0 :         trie=NULL;
     317             :     }
     318           0 :     return trie;
     319             : }
     320             : 
     321             : typedef struct NewTrieAndStatus {
     322             :     UTrie2 *trie;
     323             :     UErrorCode errorCode;
     324             :     UBool exclusiveLimit;  /* rather than inclusive range end */
     325             : } NewTrieAndStatus;
     326             : 
     327             : static UBool U_CALLCONV
     328           0 : copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
     329           0 :     NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
     330           0 :     if(value!=nt->trie->initialValue) {
     331           0 :         if(nt->exclusiveLimit) {
     332           0 :             --end;
     333             :         }
     334           0 :         if(start==end) {
     335           0 :             utrie2_set32(nt->trie, start, value, &nt->errorCode);
     336             :         } else {
     337           0 :             utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
     338             :         }
     339           0 :         return U_SUCCESS(nt->errorCode);
     340             :     } else {
     341           0 :         return TRUE;
     342             :     }
     343             : }
     344             : 
     345             : #ifdef UTRIE2_DEBUG
     346             : static void
     347             : utrie_printLengths(const UTrie *trie) {
     348             :     long indexLength=trie->indexLength;
     349             :     long dataLength=(long)trie->dataLength;
     350             :     long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
     351             :     printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
     352             :            indexLength, dataLength, totalLength);
     353             : }
     354             : 
     355             : static void
     356             : utrie2_printLengths(const UTrie2 *trie, const char *which) {
     357             :     long indexLength=trie->indexLength;
     358             :     long dataLength=(long)trie->dataLength;
     359             :     long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
     360             :     printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
     361             :            which, indexLength, dataLength, totalLength);
     362             : }
     363             : #endif
     364             : 
     365             : U_CAPI UTrie2 * U_EXPORT2
     366           0 : utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
     367             :     NewTrieAndStatus context;
     368             :     UChar lead;
     369             : 
     370           0 :     if(U_FAILURE(*pErrorCode)) {
     371           0 :         return NULL;
     372             :     }
     373           0 :     if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
     374           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     375           0 :         return NULL;
     376             :     }
     377           0 :     if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
     378           0 :         return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
     379             :     }
     380             : 
     381             :     /* Clone the frozen trie by enumerating it and building a new one. */
     382           0 :     context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
     383           0 :     if(U_FAILURE(*pErrorCode)) {
     384           0 :         return NULL;
     385             :     }
     386           0 :     context.exclusiveLimit=FALSE;
     387           0 :     context.errorCode=*pErrorCode;
     388           0 :     utrie2_enum(other, NULL, copyEnumRange, &context);
     389           0 :     *pErrorCode=context.errorCode;
     390           0 :     for(lead=0xd800; lead<0xdc00; ++lead) {
     391             :         uint32_t value;
     392           0 :         if(other->data32==NULL) {
     393           0 :             value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
     394             :         } else {
     395           0 :             value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
     396             :         }
     397           0 :         if(value!=other->initialValue) {
     398           0 :             utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
     399             :         }
     400             :     }
     401           0 :     if(U_FAILURE(*pErrorCode)) {
     402           0 :         utrie2_close(context.trie);
     403           0 :         context.trie=NULL;
     404             :     }
     405           0 :     return context.trie;
     406             : }
     407             : 
     408             : /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
     409             : U_CAPI UTrie2 * U_EXPORT2
     410           0 : utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
     411             :     NewTrieAndStatus context;
     412             :     UChar lead;
     413             : 
     414           0 :     if(U_FAILURE(*pErrorCode)) {
     415           0 :         return NULL;
     416             :     }
     417           0 :     if(trie1==NULL) {
     418           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     419           0 :         return NULL;
     420             :     }
     421           0 :     context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
     422           0 :     if(U_FAILURE(*pErrorCode)) {
     423           0 :         return NULL;
     424             :     }
     425           0 :     context.exclusiveLimit=TRUE;
     426           0 :     context.errorCode=*pErrorCode;
     427           0 :     utrie_enum(trie1, NULL, copyEnumRange, &context);
     428           0 :     *pErrorCode=context.errorCode;
     429           0 :     for(lead=0xd800; lead<0xdc00; ++lead) {
     430             :         uint32_t value;
     431           0 :         if(trie1->data32==NULL) {
     432           0 :             value=UTRIE_GET16_FROM_LEAD(trie1, lead);
     433             :         } else {
     434           0 :             value=UTRIE_GET32_FROM_LEAD(trie1, lead);
     435             :         }
     436           0 :         if(value!=trie1->initialValue) {
     437           0 :             utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
     438             :         }
     439             :     }
     440           0 :     if(U_SUCCESS(*pErrorCode)) {
     441           0 :         utrie2_freeze(context.trie,
     442           0 :                       trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
     443           0 :                       pErrorCode);
     444             :     }
     445             : #ifdef UTRIE2_DEBUG
     446             :     if(U_SUCCESS(*pErrorCode)) {
     447             :         utrie_printLengths(trie1);
     448             :         utrie2_printLengths(context.trie, "fromUTrie");
     449             :     }
     450             : #endif
     451           0 :     if(U_FAILURE(*pErrorCode)) {
     452           0 :         utrie2_close(context.trie);
     453           0 :         context.trie=NULL;
     454             :     }
     455           0 :     return context.trie;
     456             : }
     457             : 
     458             : static inline UBool
     459           0 : isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
     460             :     int32_t i2, block;
     461             : 
     462           0 :     if(U_IS_LEAD(c) && forLSCP) {
     463           0 :         i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
     464           0 :             (c>>UTRIE2_SHIFT_2);
     465             :     } else {
     466           0 :         i2=trie->index1[c>>UTRIE2_SHIFT_1]+
     467           0 :             ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
     468             :     }
     469           0 :     block=trie->index2[i2];
     470           0 :     return (UBool)(block==trie->dataNullOffset);
     471             : }
     472             : 
     473             : static int32_t
     474           0 : allocIndex2Block(UNewTrie2 *trie) {
     475             :     int32_t newBlock, newTop;
     476             : 
     477           0 :     newBlock=trie->index2Length;
     478           0 :     newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
     479           0 :     if(newTop>UPRV_LENGTHOF(trie->index2)) {
     480             :         /*
     481             :          * Should never occur.
     482             :          * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
     483             :          * or the code writes more values than should be possible.
     484             :          */
     485           0 :         return -1;
     486             :     }
     487           0 :     trie->index2Length=newTop;
     488           0 :     uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
     489           0 :     return newBlock;
     490             : }
     491             : 
     492             : static int32_t
     493           0 : getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
     494             :     int32_t i1, i2;
     495             : 
     496           0 :     if(U_IS_LEAD(c) && forLSCP) {
     497           0 :         return UTRIE2_LSCP_INDEX_2_OFFSET;
     498             :     }
     499             : 
     500           0 :     i1=c>>UTRIE2_SHIFT_1;
     501           0 :     i2=trie->index1[i1];
     502           0 :     if(i2==trie->index2NullOffset) {
     503           0 :         i2=allocIndex2Block(trie);
     504           0 :         if(i2<0) {
     505           0 :             return -1;  /* program error */
     506             :         }
     507           0 :         trie->index1[i1]=i2;
     508             :     }
     509           0 :     return i2;
     510             : }
     511             : 
     512             : static int32_t
     513           0 : allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
     514             :     int32_t newBlock, newTop;
     515             : 
     516           0 :     if(trie->firstFreeBlock!=0) {
     517             :         /* get the first free block */
     518           0 :         newBlock=trie->firstFreeBlock;
     519           0 :         trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
     520             :     } else {
     521             :         /* get a new block from the high end */
     522           0 :         newBlock=trie->dataLength;
     523           0 :         newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
     524           0 :         if(newTop>trie->dataCapacity) {
     525             :             /* out of memory in the data array */
     526             :             int32_t capacity;
     527             :             uint32_t *data;
     528             : 
     529           0 :             if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
     530           0 :                 capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
     531           0 :             } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
     532           0 :                 capacity=UNEWTRIE2_MAX_DATA_LENGTH;
     533             :             } else {
     534             :                 /*
     535             :                  * Should never occur.
     536             :                  * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
     537             :                  * or the code writes more values than should be possible.
     538             :                  */
     539           0 :                 return -1;
     540             :             }
     541           0 :             data=(uint32_t *)uprv_malloc(capacity*4);
     542           0 :             if(data==NULL) {
     543           0 :                 return -1;
     544             :             }
     545           0 :             uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
     546           0 :             uprv_free(trie->data);
     547           0 :             trie->data=data;
     548           0 :             trie->dataCapacity=capacity;
     549             :         }
     550           0 :         trie->dataLength=newTop;
     551             :     }
     552           0 :     uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
     553           0 :     trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
     554           0 :     return newBlock;
     555             : }
     556             : 
     557             : /* call when the block's reference counter reaches 0 */
     558             : static void
     559           0 : releaseDataBlock(UNewTrie2 *trie, int32_t block) {
     560             :     /* put this block at the front of the free-block chain */
     561           0 :     trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
     562           0 :     trie->firstFreeBlock=block;
     563           0 : }
     564             : 
     565             : static inline UBool
     566           0 : isWritableBlock(UNewTrie2 *trie, int32_t block) {
     567           0 :     return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
     568             : }
     569             : 
     570             : static inline void
     571           0 : setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
     572             :     int32_t oldBlock;
     573           0 :     ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
     574           0 :     oldBlock=trie->index2[i2];
     575           0 :     if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
     576           0 :         releaseDataBlock(trie, oldBlock);
     577             :     }
     578           0 :     trie->index2[i2]=block;
     579           0 : }
     580             : 
     581             : /**
     582             :  * No error checking for illegal arguments.
     583             :  *
     584             :  * @return -1 if no new data block available (out of memory in data array)
     585             :  * @internal
     586             :  */
     587             : static int32_t
     588           0 : getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
     589             :     int32_t i2, oldBlock, newBlock;
     590             : 
     591           0 :     i2=getIndex2Block(trie, c, forLSCP);
     592           0 :     if(i2<0) {
     593           0 :         return -1;  /* program error */
     594             :     }
     595             : 
     596           0 :     i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
     597           0 :     oldBlock=trie->index2[i2];
     598           0 :     if(isWritableBlock(trie, oldBlock)) {
     599           0 :         return oldBlock;
     600             :     }
     601             : 
     602             :     /* allocate a new data block */
     603           0 :     newBlock=allocDataBlock(trie, oldBlock);
     604           0 :     if(newBlock<0) {
     605             :         /* out of memory in the data array */
     606           0 :         return -1;
     607             :     }
     608           0 :     setIndex2Entry(trie, i2, newBlock);
     609           0 :     return newBlock;
     610             : }
     611             : 
     612             : /**
     613             :  * @return TRUE if the value was successfully set
     614             :  */
     615             : static void
     616           0 : set32(UNewTrie2 *trie,
     617             :       UChar32 c, UBool forLSCP, uint32_t value,
     618             :       UErrorCode *pErrorCode) {
     619             :     int32_t block;
     620             : 
     621           0 :     if(trie==NULL || trie->isCompacted) {
     622           0 :         *pErrorCode=U_NO_WRITE_PERMISSION;
     623           0 :         return;
     624             :     }
     625             : 
     626           0 :     block=getDataBlock(trie, c, forLSCP);
     627           0 :     if(block<0) {
     628           0 :         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     629           0 :         return;
     630             :     }
     631             : 
     632           0 :     trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
     633             : }
     634             : 
     635             : U_CAPI void U_EXPORT2
     636           0 : utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
     637           0 :     if(U_FAILURE(*pErrorCode)) {
     638           0 :         return;
     639             :     }
     640           0 :     if((uint32_t)c>0x10ffff) {
     641           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     642           0 :         return;
     643             :     }
     644           0 :     set32(trie->newTrie, c, TRUE, value, pErrorCode);
     645             : }
     646             : 
     647             : U_CAPI void U_EXPORT2
     648           0 : utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
     649             :                                      UChar32 c, uint32_t value,
     650             :                                      UErrorCode *pErrorCode) {
     651           0 :     if(U_FAILURE(*pErrorCode)) {
     652           0 :         return;
     653             :     }
     654           0 :     if(!U_IS_LEAD(c)) {
     655           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     656           0 :         return;
     657             :     }
     658           0 :     set32(trie->newTrie, c, FALSE, value, pErrorCode);
     659             : }
     660             : 
     661             : static void
     662           0 : writeBlock(uint32_t *block, uint32_t value) {
     663           0 :     uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
     664           0 :     while(block<limit) {
     665           0 :         *block++=value;
     666             :     }
     667           0 : }
     668             : 
     669             : /**
     670             :  * initialValue is ignored if overwrite=TRUE
     671             :  * @internal
     672             :  */
     673             : static void
     674           0 : fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
     675             :           uint32_t value, uint32_t initialValue, UBool overwrite) {
     676             :     uint32_t *pLimit;
     677             : 
     678           0 :     pLimit=block+limit;
     679           0 :     block+=start;
     680           0 :     if(overwrite) {
     681           0 :         while(block<pLimit) {
     682           0 :             *block++=value;
     683             :         }
     684             :     } else {
     685           0 :         while(block<pLimit) {
     686           0 :             if(*block==initialValue) {
     687           0 :                 *block=value;
     688             :             }
     689           0 :             ++block;
     690             :         }
     691             :     }
     692           0 : }
     693             : 
     694             : U_CAPI void U_EXPORT2
     695           0 : utrie2_setRange32(UTrie2 *trie,
     696             :                   UChar32 start, UChar32 end,
     697             :                   uint32_t value, UBool overwrite,
     698             :                   UErrorCode *pErrorCode) {
     699             :     /*
     700             :      * repeat value in [start..end]
     701             :      * mark index values for repeat-data blocks by setting bit 31 of the index values
     702             :      * fill around existing values if any, if(overwrite)
     703             :      */
     704             :     UNewTrie2 *newTrie;
     705             :     int32_t block, rest, repeatBlock;
     706             :     UChar32 limit;
     707             : 
     708           0 :     if(U_FAILURE(*pErrorCode)) {
     709           0 :         return;
     710             :     }
     711           0 :     if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
     712           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     713           0 :         return;
     714             :     }
     715           0 :     newTrie=trie->newTrie;
     716           0 :     if(newTrie==NULL || newTrie->isCompacted) {
     717           0 :         *pErrorCode=U_NO_WRITE_PERMISSION;
     718           0 :         return;
     719             :     }
     720           0 :     if(!overwrite && value==newTrie->initialValue) {
     721           0 :         return; /* nothing to do */
     722             :     }
     723             : 
     724           0 :     limit=end+1;
     725           0 :     if(start&UTRIE2_DATA_MASK) {
     726             :         UChar32 nextStart;
     727             : 
     728             :         /* set partial block at [start..following block boundary[ */
     729           0 :         block=getDataBlock(newTrie, start, TRUE);
     730           0 :         if(block<0) {
     731           0 :             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     732           0 :             return;
     733             :         }
     734             : 
     735           0 :         nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
     736           0 :         if(nextStart<=limit) {
     737           0 :             fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
     738           0 :                       value, newTrie->initialValue, overwrite);
     739           0 :             start=nextStart;
     740             :         } else {
     741           0 :             fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
     742           0 :                       value, newTrie->initialValue, overwrite);
     743           0 :             return;
     744             :         }
     745             :     }
     746             : 
     747             :     /* number of positions in the last, partial block */
     748           0 :     rest=limit&UTRIE2_DATA_MASK;
     749             : 
     750             :     /* round down limit to a block boundary */
     751           0 :     limit&=~UTRIE2_DATA_MASK;
     752             : 
     753             :     /* iterate over all-value blocks */
     754           0 :     if(value==newTrie->initialValue) {
     755           0 :         repeatBlock=newTrie->dataNullOffset;
     756             :     } else {
     757           0 :         repeatBlock=-1;
     758             :     }
     759             : 
     760           0 :     while(start<limit) {
     761             :         int32_t i2;
     762           0 :         UBool setRepeatBlock=FALSE;
     763             : 
     764           0 :         if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
     765           0 :             start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
     766           0 :             continue;
     767             :         }
     768             : 
     769             :         /* get index value */
     770           0 :         i2=getIndex2Block(newTrie, start, TRUE);
     771           0 :         if(i2<0) {
     772           0 :             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
     773           0 :             return;
     774             :         }
     775           0 :         i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
     776           0 :         block=newTrie->index2[i2];
     777           0 :         if(isWritableBlock(newTrie, block)) {
     778             :             /* already allocated */
     779           0 :             if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
     780             :                 /*
     781             :                  * We overwrite all values, and it's not a
     782             :                  * protected (ASCII-linear or 2-byte UTF-8) block:
     783             :                  * replace with the repeatBlock.
     784             :                  */
     785           0 :                 setRepeatBlock=TRUE;
     786             :             } else {
     787             :                 /* !overwrite, or protected block: just write the values into this block */
     788           0 :                 fillBlock(newTrie->data+block,
     789             :                           0, UTRIE2_DATA_BLOCK_LENGTH,
     790           0 :                           value, newTrie->initialValue, overwrite);
     791             :             }
     792           0 :         } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
     793             :             /*
     794             :              * Set the repeatBlock instead of the null block or previous repeat block:
     795             :              *
     796             :              * If !isWritableBlock() then all entries in the block have the same value
     797             :              * because it's the null block or a range block (the repeatBlock from a previous
     798             :              * call to utrie2_setRange32()).
     799             :              * No other blocks are used multiple times before compacting.
     800             :              *
     801             :              * The null block is the only non-writable block with the initialValue because
     802             :              * of the repeatBlock initialization above. (If value==initialValue, then
     803             :              * the repeatBlock will be the null data block.)
     804             :              *
     805             :              * We set our repeatBlock if the desired value differs from the block's value,
     806             :              * and if we overwrite any data or if the data is all initial values
     807             :              * (which is the same as the block being the null block, see above).
     808             :              */
     809           0 :             setRepeatBlock=TRUE;
     810             :         }
     811           0 :         if(setRepeatBlock) {
     812           0 :             if(repeatBlock>=0) {
     813           0 :                 setIndex2Entry(newTrie, i2, repeatBlock);
     814             :             } else {
     815             :                 /* create and set and fill the repeatBlock */
     816           0 :                 repeatBlock=getDataBlock(newTrie, start, TRUE);
     817           0 :                 if(repeatBlock<0) {
     818           0 :                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     819           0 :                     return;
     820             :                 }
     821           0 :                 writeBlock(newTrie->data+repeatBlock, value);
     822             :             }
     823             :         }
     824             : 
     825           0 :         start+=UTRIE2_DATA_BLOCK_LENGTH;
     826             :     }
     827             : 
     828           0 :     if(rest>0) {
     829             :         /* set partial block at [last block boundary..limit[ */
     830           0 :         block=getDataBlock(newTrie, start, TRUE);
     831           0 :         if(block<0) {
     832           0 :             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
     833           0 :             return;
     834             :         }
     835             : 
     836           0 :         fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
     837             :     }
     838             : 
     839           0 :     return;
     840             : }
     841             : 
     842             : /* compaction --------------------------------------------------------------- */
     843             : 
     844             : static inline UBool
     845           0 : equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
     846           0 :     while(length>0 && *s==*t) {
     847           0 :         ++s;
     848           0 :         ++t;
     849           0 :         --length;
     850             :     }
     851           0 :     return (UBool)(length==0);
     852             : }
     853             : 
     854             : static inline UBool
     855           0 : equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
     856           0 :     while(length>0 && *s==*t) {
     857           0 :         ++s;
     858           0 :         ++t;
     859           0 :         --length;
     860             :     }
     861           0 :     return (UBool)(length==0);
     862             : }
     863             : 
     864             : static int32_t
     865           0 : findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
     866             :     int32_t block;
     867             : 
     868             :     /* ensure that we do not even partially get past index2Length */
     869           0 :     index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
     870             : 
     871           0 :     for(block=0; block<=index2Length; ++block) {
     872           0 :         if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
     873           0 :             return block;
     874             :         }
     875             :     }
     876           0 :     return -1;
     877             : }
     878             : 
     879             : static int32_t
     880           0 : findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
     881             :     int32_t block;
     882             : 
     883             :     /* ensure that we do not even partially get past dataLength */
     884           0 :     dataLength-=blockLength;
     885             : 
     886           0 :     for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
     887           0 :         if(equal_uint32(data+block, data+otherBlock, blockLength)) {
     888           0 :             return block;
     889             :         }
     890             :     }
     891           0 :     return -1;
     892             : }
     893             : 
     894             : /*
     895             :  * Find the start of the last range in the trie by enumerating backward.
     896             :  * Indexes for supplementary code points higher than this will be omitted.
     897             :  */
     898             : static UChar32
     899           0 : findHighStart(UNewTrie2 *trie, uint32_t highValue) {
     900             :     const uint32_t *data32;
     901             : 
     902             :     uint32_t value, initialValue;
     903             :     UChar32 c, prev;
     904             :     int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
     905             : 
     906           0 :     data32=trie->data;
     907           0 :     initialValue=trie->initialValue;
     908             : 
     909           0 :     index2NullOffset=trie->index2NullOffset;
     910           0 :     nullBlock=trie->dataNullOffset;
     911             : 
     912             :     /* set variables for previous range */
     913           0 :     if(highValue==initialValue) {
     914           0 :         prevI2Block=index2NullOffset;
     915           0 :         prevBlock=nullBlock;
     916             :     } else {
     917           0 :         prevI2Block=-1;
     918           0 :         prevBlock=-1;
     919             :     }
     920           0 :     prev=0x110000;
     921             : 
     922             :     /* enumerate index-2 blocks */
     923           0 :     i1=UNEWTRIE2_INDEX_1_LENGTH;
     924           0 :     c=prev;
     925           0 :     while(c>0) {
     926           0 :         i2Block=trie->index1[--i1];
     927           0 :         if(i2Block==prevI2Block) {
     928             :             /* the index-2 block is the same as the previous one, and filled with highValue */
     929           0 :             c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
     930           0 :             continue;
     931             :         }
     932           0 :         prevI2Block=i2Block;
     933           0 :         if(i2Block==index2NullOffset) {
     934             :             /* this is the null index-2 block */
     935           0 :             if(highValue!=initialValue) {
     936           0 :                 return c;
     937             :             }
     938           0 :             c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
     939             :         } else {
     940             :             /* enumerate data blocks for one index-2 block */
     941           0 :             for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
     942           0 :                 block=trie->index2[i2Block+ --i2];
     943           0 :                 if(block==prevBlock) {
     944             :                     /* the block is the same as the previous one, and filled with highValue */
     945           0 :                     c-=UTRIE2_DATA_BLOCK_LENGTH;
     946           0 :                     continue;
     947             :                 }
     948           0 :                 prevBlock=block;
     949           0 :                 if(block==nullBlock) {
     950             :                     /* this is the null data block */
     951           0 :                     if(highValue!=initialValue) {
     952           0 :                         return c;
     953             :                     }
     954           0 :                     c-=UTRIE2_DATA_BLOCK_LENGTH;
     955             :                 } else {
     956           0 :                     for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
     957           0 :                         value=data32[block+ --j];
     958           0 :                         if(value!=highValue) {
     959           0 :                             return c;
     960             :                         }
     961           0 :                         --c;
     962             :                     }
     963             :                 }
     964             :             }
     965             :         }
     966             :     }
     967             : 
     968             :     /* deliver last range */
     969           0 :     return 0;
     970             : }
     971             : 
     972             : /*
     973             :  * Compact a build-time trie.
     974             :  *
     975             :  * The compaction
     976             :  * - removes blocks that are identical with earlier ones
     977             :  * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
     978             :  * - moves blocks in steps of the data granularity
     979             :  * - moves and overlaps blocks that overlap with multiple values in the overlap region
     980             :  *
     981             :  * It does not
     982             :  * - try to move and overlap blocks that are not already adjacent
     983             :  */
     984             : static void
     985           0 : compactData(UNewTrie2 *trie) {
     986             :     int32_t start, newStart, movedStart;
     987             :     int32_t blockLength, overlap;
     988             :     int32_t i, mapIndex, blockCount;
     989             : 
     990             :     /* do not compact linear-ASCII data */
     991           0 :     newStart=UTRIE2_DATA_START_OFFSET;
     992           0 :     for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
     993           0 :         trie->map[i]=start;
     994             :     }
     995             : 
     996             :     /*
     997             :      * Start with a block length of 64 for 2-byte UTF-8,
     998             :      * then switch to UTRIE2_DATA_BLOCK_LENGTH.
     999             :      */
    1000           0 :     blockLength=64;
    1001           0 :     blockCount=blockLength>>UTRIE2_SHIFT_2;
    1002           0 :     for(start=newStart; start<trie->dataLength;) {
    1003             :         /*
    1004             :          * start: index of first entry of current block
    1005             :          * newStart: index where the current block is to be moved
    1006             :          *           (right after current end of already-compacted data)
    1007             :          */
    1008           0 :         if(start==UNEWTRIE2_DATA_0800_OFFSET) {
    1009           0 :             blockLength=UTRIE2_DATA_BLOCK_LENGTH;
    1010           0 :             blockCount=1;
    1011             :         }
    1012             : 
    1013             :         /* skip blocks that are not used */
    1014           0 :         if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
    1015             :             /* advance start to the next block */
    1016           0 :             start+=blockLength;
    1017             : 
    1018             :             /* leave newStart with the previous block! */
    1019           0 :             continue;
    1020             :         }
    1021             : 
    1022             :         /* search for an identical block */
    1023           0 :         if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
    1024             :              >=0
    1025             :         ) {
    1026             :             /* found an identical block, set the other block's index value for the current block */
    1027           0 :             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
    1028           0 :                 trie->map[mapIndex++]=movedStart;
    1029           0 :                 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
    1030             :             }
    1031             : 
    1032             :             /* advance start to the next block */
    1033           0 :             start+=blockLength;
    1034             : 
    1035             :             /* leave newStart with the previous block! */
    1036           0 :             continue;
    1037             :         }
    1038             : 
    1039             :         /* see if the beginning of this block can be overlapped with the end of the previous block */
    1040             :         /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
    1041           0 :         for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
    1042           0 :             overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
    1043           0 :             overlap-=UTRIE2_DATA_GRANULARITY) {}
    1044             : 
    1045           0 :         if(overlap>0 || newStart<start) {
    1046             :             /* some overlap, or just move the whole block */
    1047           0 :             movedStart=newStart-overlap;
    1048           0 :             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
    1049           0 :                 trie->map[mapIndex++]=movedStart;
    1050           0 :                 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
    1051             :             }
    1052             : 
    1053             :             /* move the non-overlapping indexes to their new positions */
    1054           0 :             start+=overlap;
    1055           0 :             for(i=blockLength-overlap; i>0; --i) {
    1056           0 :                 trie->data[newStart++]=trie->data[start++];
    1057             :             }
    1058             :         } else /* no overlap && newStart==start */ {
    1059           0 :             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
    1060           0 :                 trie->map[mapIndex++]=start;
    1061           0 :                 start+=UTRIE2_DATA_BLOCK_LENGTH;
    1062             :             }
    1063           0 :             newStart=start;
    1064             :         }
    1065             :     }
    1066             : 
    1067             :     /* now adjust the index-2 table */
    1068           0 :     for(i=0; i<trie->index2Length; ++i) {
    1069           0 :         if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
    1070             :             /* Gap indexes are invalid (-1). Skip over the gap. */
    1071           0 :             i+=UNEWTRIE2_INDEX_GAP_LENGTH;
    1072             :         }
    1073           0 :         trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
    1074             :     }
    1075           0 :     trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
    1076             : 
    1077             :     /* ensure dataLength alignment */
    1078           0 :     while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
    1079           0 :         trie->data[newStart++]=trie->initialValue;
    1080             :     }
    1081             : 
    1082             : #ifdef UTRIE2_DEBUG
    1083             :     /* we saved some space */
    1084             :     printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
    1085             :             (long)trie->dataLength, (long)newStart);
    1086             : #endif
    1087             : 
    1088           0 :     trie->dataLength=newStart;
    1089           0 : }
    1090             : 
    1091             : static void
    1092           0 : compactIndex2(UNewTrie2 *trie) {
    1093             :     int32_t i, start, newStart, movedStart, overlap;
    1094             : 
    1095             :     /* do not compact linear-BMP index-2 blocks */
    1096           0 :     newStart=UTRIE2_INDEX_2_BMP_LENGTH;
    1097           0 :     for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
    1098           0 :         trie->map[i]=start;
    1099             :     }
    1100             : 
    1101             :     /* Reduce the index table gap to what will be needed at runtime. */
    1102           0 :     newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
    1103             : 
    1104           0 :     for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
    1105             :         /*
    1106             :          * start: index of first entry of current block
    1107             :          * newStart: index where the current block is to be moved
    1108             :          *           (right after current end of already-compacted data)
    1109             :          */
    1110             : 
    1111             :         /* search for an identical block */
    1112           0 :         if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
    1113             :              >=0
    1114             :         ) {
    1115             :             /* found an identical block, set the other block's index value for the current block */
    1116           0 :             trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
    1117             : 
    1118             :             /* advance start to the next block */
    1119           0 :             start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
    1120             : 
    1121             :             /* leave newStart with the previous block! */
    1122           0 :             continue;
    1123             :         }
    1124             : 
    1125             :         /* see if the beginning of this block can be overlapped with the end of the previous block */
    1126             :         /* look for maximum overlap with the previous, adjacent block */
    1127           0 :         for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
    1128           0 :             overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
    1129             :             --overlap) {}
    1130             : 
    1131           0 :         if(overlap>0 || newStart<start) {
    1132             :             /* some overlap, or just move the whole block */
    1133           0 :             trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
    1134             : 
    1135             :             /* move the non-overlapping indexes to their new positions */
    1136           0 :             start+=overlap;
    1137           0 :             for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
    1138           0 :                 trie->index2[newStart++]=trie->index2[start++];
    1139             :             }
    1140             :         } else /* no overlap && newStart==start */ {
    1141           0 :             trie->map[start>>UTRIE2_SHIFT_1_2]=start;
    1142           0 :             start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
    1143           0 :             newStart=start;
    1144             :         }
    1145             :     }
    1146             : 
    1147             :     /* now adjust the index-1 table */
    1148           0 :     for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
    1149           0 :         trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
    1150             :     }
    1151           0 :     trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
    1152             : 
    1153             :     /*
    1154             :      * Ensure data table alignment:
    1155             :      * Needs to be granularity-aligned for 16-bit trie
    1156             :      * (so that dataMove will be down-shiftable),
    1157             :      * and 2-aligned for uint32_t data.
    1158             :      */
    1159           0 :     while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
    1160             :         /* Arbitrary value: 0x3fffc not possible for real data. */
    1161           0 :         trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
    1162             :     }
    1163             : 
    1164             : #ifdef UTRIE2_DEBUG
    1165             :     /* we saved some space */
    1166             :     printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
    1167             :             (long)trie->index2Length, (long)newStart);
    1168             : #endif
    1169             : 
    1170           0 :     trie->index2Length=newStart;
    1171           0 : }
    1172             : 
    1173             : static void
    1174           0 : compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
    1175             :     UNewTrie2 *newTrie;
    1176             :     UChar32 highStart, suppHighStart;
    1177             :     uint32_t highValue;
    1178             : 
    1179           0 :     newTrie=trie->newTrie;
    1180             : 
    1181             :     /* find highStart and round it up */
    1182           0 :     highValue=utrie2_get32(trie, 0x10ffff);
    1183           0 :     highStart=findHighStart(newTrie, highValue);
    1184           0 :     highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
    1185           0 :     if(highStart==0x110000) {
    1186           0 :         highValue=trie->errorValue;
    1187             :     }
    1188             : 
    1189             :     /*
    1190             :      * Set trie->highStart only after utrie2_get32(trie, highStart).
    1191             :      * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
    1192             :      */
    1193           0 :     trie->highStart=newTrie->highStart=highStart;
    1194             : 
    1195             : #ifdef UTRIE2_DEBUG
    1196             :     printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
    1197             :             (long)highStart, (long)highValue, (long)trie->initialValue);
    1198             : #endif
    1199             : 
    1200           0 :     if(highStart<0x110000) {
    1201             :         /* Blank out [highStart..10ffff] to release associated data blocks. */
    1202           0 :         suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
    1203           0 :         utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
    1204           0 :         if(U_FAILURE(*pErrorCode)) {
    1205           0 :             return;
    1206             :         }
    1207             :     }
    1208             : 
    1209           0 :     compactData(newTrie);
    1210           0 :     if(highStart>0x10000) {
    1211           0 :         compactIndex2(newTrie);
    1212             : #ifdef UTRIE2_DEBUG
    1213             :     } else {
    1214             :         printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
    1215             :                 (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
    1216             : #endif
    1217             :     }
    1218             : 
    1219             :     /*
    1220             :      * Store the highValue in the data array and round up the dataLength.
    1221             :      * Must be done after compactData() because that assumes that dataLength
    1222             :      * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
    1223             :      */
    1224           0 :     newTrie->data[newTrie->dataLength++]=highValue;
    1225           0 :     while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
    1226           0 :         newTrie->data[newTrie->dataLength++]=trie->initialValue;
    1227             :     }
    1228             : 
    1229           0 :     newTrie->isCompacted=TRUE;
    1230             : }
    1231             : 
    1232             : /* serialization ------------------------------------------------------------ */
    1233             : 
    1234             : /**
    1235             :  * Maximum length of the runtime index array.
    1236             :  * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
    1237             :  * (The actual maximum length is lower,
    1238             :  * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
    1239             :  */
    1240             : #define UTRIE2_MAX_INDEX_LENGTH 0xffff
    1241             : 
    1242             : /**
    1243             :  * Maximum length of the runtime data array.
    1244             :  * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
    1245             :  * and by uint16_t UTrie2Header.shiftedDataLength.
    1246             :  */
    1247             : #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
    1248             : 
    1249             : /* Compact and internally serialize the trie. */
    1250             : U_CAPI void U_EXPORT2
    1251           0 : utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
    1252             :     UNewTrie2 *newTrie;
    1253             :     UTrie2Header *header;
    1254             :     uint32_t *p;
    1255             :     uint16_t *dest16;
    1256             :     int32_t i, length;
    1257             :     int32_t allIndexesLength;
    1258             :     int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
    1259             :     UChar32 highStart;
    1260             : 
    1261             :     /* argument check */
    1262           0 :     if(U_FAILURE(*pErrorCode)) {
    1263           0 :         return;
    1264             :     }
    1265           0 :     if( trie==NULL ||
    1266           0 :         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
    1267             :     ) {
    1268           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1269           0 :         return;
    1270             :     }
    1271           0 :     newTrie=trie->newTrie;
    1272           0 :     if(newTrie==NULL) {
    1273             :         /* already frozen */
    1274             :         UTrie2ValueBits frozenValueBits=
    1275           0 :             trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
    1276           0 :         if(valueBits!=frozenValueBits) {
    1277           0 :             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1278             :         }
    1279           0 :         return;
    1280             :     }
    1281             : 
    1282             :     /* compact if necessary */
    1283           0 :     if(!newTrie->isCompacted) {
    1284           0 :         compactTrie(trie, pErrorCode);
    1285           0 :         if(U_FAILURE(*pErrorCode)) {
    1286           0 :             return;
    1287             :         }
    1288             :     }
    1289           0 :     highStart=trie->highStart;
    1290             : 
    1291           0 :     if(highStart<=0x10000) {
    1292           0 :         allIndexesLength=UTRIE2_INDEX_1_OFFSET;
    1293             :     } else {
    1294           0 :         allIndexesLength=newTrie->index2Length;
    1295             :     }
    1296           0 :     if(valueBits==UTRIE2_16_VALUE_BITS) {
    1297           0 :         dataMove=allIndexesLength;
    1298             :     } else {
    1299           0 :         dataMove=0;
    1300             :     }
    1301             : 
    1302             :     /* are indexLength and dataLength within limits? */
    1303           0 :     if( /* for unshifted indexLength */
    1304           0 :         allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
    1305             :         /* for unshifted dataNullOffset */
    1306           0 :         (dataMove+newTrie->dataNullOffset)>0xffff ||
    1307             :         /* for unshifted 2-byte UTF-8 index-2 values */
    1308           0 :         (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
    1309             :         /* for shiftedDataLength */
    1310           0 :         (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
    1311             :     ) {
    1312           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    1313           0 :         return;
    1314             :     }
    1315             : 
    1316             :     /* calculate the total serialized length */
    1317           0 :     length=sizeof(UTrie2Header)+allIndexesLength*2;
    1318           0 :     if(valueBits==UTRIE2_16_VALUE_BITS) {
    1319           0 :         length+=newTrie->dataLength*2;
    1320             :     } else {
    1321           0 :         length+=newTrie->dataLength*4;
    1322             :     }
    1323             : 
    1324           0 :     trie->memory=uprv_malloc(length);
    1325           0 :     if(trie->memory==NULL) {
    1326           0 :         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    1327           0 :         return;
    1328             :     }
    1329           0 :     trie->length=length;
    1330           0 :     trie->isMemoryOwned=TRUE;
    1331             : 
    1332           0 :     trie->indexLength=allIndexesLength;
    1333           0 :     trie->dataLength=newTrie->dataLength;
    1334           0 :     if(highStart<=0x10000) {
    1335           0 :         trie->index2NullOffset=0xffff;
    1336             :     } else {
    1337           0 :         trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
    1338             :     }
    1339           0 :     trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
    1340           0 :     trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
    1341             : 
    1342             :     /* set the header fields */
    1343           0 :     header=(UTrie2Header *)trie->memory;
    1344             : 
    1345           0 :     header->signature=UTRIE2_SIG; /* "Tri2" */
    1346           0 :     header->options=(uint16_t)valueBits;
    1347             : 
    1348           0 :     header->indexLength=(uint16_t)trie->indexLength;
    1349           0 :     header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
    1350           0 :     header->index2NullOffset=trie->index2NullOffset;
    1351           0 :     header->dataNullOffset=trie->dataNullOffset;
    1352           0 :     header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
    1353             : 
    1354             :     /* fill the index and data arrays */
    1355           0 :     dest16=(uint16_t *)(header+1);
    1356           0 :     trie->index=dest16;
    1357             : 
    1358             :     /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
    1359           0 :     p=(uint32_t *)newTrie->index2;
    1360           0 :     for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
    1361           0 :         *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
    1362             :     }
    1363             : 
    1364             :     /* write UTF-8 2-byte index-2 values, not right-shifted */
    1365           0 :     for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
    1366           0 :         *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
    1367             :     }
    1368           0 :     for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
    1369           0 :         *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
    1370             :     }
    1371             : 
    1372           0 :     if(highStart>0x10000) {
    1373           0 :         int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
    1374           0 :         int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
    1375             : 
    1376             :         /* write 16-bit index-1 values for supplementary code points */
    1377           0 :         p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
    1378           0 :         for(i=index1Length; i>0; --i) {
    1379           0 :             *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
    1380             :         }
    1381             : 
    1382             :         /*
    1383             :          * write the index-2 array values for supplementary code points,
    1384             :          * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
    1385             :          */
    1386           0 :         p=(uint32_t *)newTrie->index2+index2Offset;
    1387           0 :         for(i=newTrie->index2Length-index2Offset; i>0; --i) {
    1388           0 :             *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
    1389             :         }
    1390             :     }
    1391             : 
    1392             :     /* write the 16/32-bit data array */
    1393           0 :     switch(valueBits) {
    1394             :     case UTRIE2_16_VALUE_BITS:
    1395             :         /* write 16-bit data values */
    1396           0 :         trie->data16=dest16;
    1397           0 :         trie->data32=NULL;
    1398           0 :         p=newTrie->data;
    1399           0 :         for(i=newTrie->dataLength; i>0; --i) {
    1400           0 :             *dest16++=(uint16_t)*p++;
    1401             :         }
    1402           0 :         break;
    1403             :     case UTRIE2_32_VALUE_BITS:
    1404             :         /* write 32-bit data values */
    1405           0 :         trie->data16=NULL;
    1406           0 :         trie->data32=(uint32_t *)dest16;
    1407           0 :         uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
    1408           0 :         break;
    1409             :     default:
    1410           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1411           0 :         return;
    1412             :     }
    1413             : 
    1414             :     /* Delete the UNewTrie2. */
    1415           0 :     uprv_free(newTrie->data);
    1416           0 :     uprv_free(newTrie);
    1417           0 :     trie->newTrie=NULL;
    1418             : }
    1419             : 
    1420             : /*
    1421             :  * This is here to avoid a dependency from utrie2.cpp on utrie.c.
    1422             :  * This file already depends on utrie.c.
    1423             :  * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
    1424             :  */
    1425             : U_CAPI int32_t U_EXPORT2
    1426           0 : utrie2_swapAnyVersion(const UDataSwapper *ds,
    1427             :                       const void *inData, int32_t length, void *outData,
    1428             :                       UErrorCode *pErrorCode) {
    1429           0 :     if(U_SUCCESS(*pErrorCode)) {
    1430           0 :         switch(utrie2_getVersion(inData, length, TRUE)) {
    1431             :         case 1:
    1432           0 :             return utrie_swap(ds, inData, length, outData, pErrorCode);
    1433             :         case 2:
    1434           0 :             return utrie2_swap(ds, inData, length, outData, pErrorCode);
    1435             :         default:
    1436           0 :             *pErrorCode=U_INVALID_FORMAT_ERROR;
    1437           0 :             return 0;
    1438             :         }
    1439             :     }
    1440           0 :     return 0;
    1441             : }

Generated by: LCOV version 1.13