LCOV - code coverage report
Current view: top level - intl/icu/source/common - unames.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 856 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 35 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 1999-2014, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  unames.c
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 1999oct04
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : #include "unicode/putil.h"
      21             : #include "unicode/uchar.h"
      22             : #include "unicode/udata.h"
      23             : #include "unicode/utf.h"
      24             : #include "unicode/utf16.h"
      25             : #include "uassert.h"
      26             : #include "ustr_imp.h"
      27             : #include "umutex.h"
      28             : #include "cmemory.h"
      29             : #include "cstring.h"
      30             : #include "ucln_cmn.h"
      31             : #include "udataswp.h"
      32             : #include "uprops.h"
      33             : 
      34             : U_NAMESPACE_BEGIN
      35             : 
      36             : /* prototypes ------------------------------------------------------------- */
      37             : 
      38             : static const char DATA_NAME[] = "unames";
      39             : static const char DATA_TYPE[] = "icu";
      40             : 
      41             : #define GROUP_SHIFT 5
      42             : #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
      43             : #define GROUP_MASK (LINES_PER_GROUP-1)
      44             : 
      45             : /*
      46             :  * This struct was replaced by explicitly accessing equivalent
      47             :  * fields from triples of uint16_t.
      48             :  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
      49             :  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
      50             :  * would advance by 6 bytes (3 uint16_t).
      51             :  *
      52             :  * We can't just change the data structure because it's loaded from a data file,
      53             :  * and we don't want to make it less compact, so we changed the access code.
      54             :  *
      55             :  * For details see ICU tickets 6331 and 6008.
      56             : typedef struct {
      57             :     uint16_t groupMSB,
      58             :              offsetHigh, offsetLow; / * avoid padding * /
      59             : } Group;
      60             :  */
      61             : enum {
      62             :     GROUP_MSB,
      63             :     GROUP_OFFSET_HIGH,
      64             :     GROUP_OFFSET_LOW,
      65             :     GROUP_LENGTH
      66             : };
      67             : 
      68             : /*
      69             :  * Get the 32-bit group offset.
      70             :  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
      71             :  * @return group offset (int32_t)
      72             :  */
      73             : #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
      74             : 
      75             : #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
      76             : #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
      77             : 
      78             : typedef struct {
      79             :     uint32_t start, end;
      80             :     uint8_t type, variant;
      81             :     uint16_t size;
      82             : } AlgorithmicRange;
      83             : 
      84             : typedef struct {
      85             :     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
      86             : } UCharNames;
      87             : 
      88             : /*
      89             :  * Get the groups table from a UCharNames struct.
      90             :  * The groups table consists of one uint16_t groupCount followed by
      91             :  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
      92             :  * and the comment for the old struct Group above.
      93             :  *
      94             :  * @param names (const UCharNames *) pointer to the UCharNames indexes
      95             :  * @return (const uint16_t *) pointer to the groups table
      96             :  */
      97             : #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
      98             : 
      99             : typedef struct {
     100             :     const char *otherName;
     101             :     UChar32 code;
     102             : } FindName;
     103             : 
     104             : #define DO_FIND_NAME NULL
     105             : 
     106             : static UDataMemory *uCharNamesData=NULL;
     107             : static UCharNames *uCharNames=NULL;
     108             : static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
     109             : 
     110             : /*
     111             :  * Maximum length of character names (regular & 1.0).
     112             :  */
     113             : static int32_t gMaxNameLength=0;
     114             : 
     115             : /*
     116             :  * Set of chars used in character names (regular & 1.0).
     117             :  * Chars are platform-dependent (can be EBCDIC).
     118             :  */
     119             : static uint32_t gNameSet[8]={ 0 };
     120             : 
     121             : #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
     122             : #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
     123             : #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
     124             : 
     125             : #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
     126             : 
     127             : static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
     128             :     "unassigned",
     129             :     "uppercase letter",
     130             :     "lowercase letter",
     131             :     "titlecase letter",
     132             :     "modifier letter",
     133             :     "other letter",
     134             :     "non spacing mark",
     135             :     "enclosing mark",
     136             :     "combining spacing mark",
     137             :     "decimal digit number",
     138             :     "letter number",
     139             :     "other number",
     140             :     "space separator",
     141             :     "line separator",
     142             :     "paragraph separator",
     143             :     "control",
     144             :     "format",
     145             :     "private use area",
     146             :     "surrogate",
     147             :     "dash punctuation",   
     148             :     "start punctuation",
     149             :     "end punctuation",
     150             :     "connector punctuation",
     151             :     "other punctuation",
     152             :     "math symbol",
     153             :     "currency symbol",
     154             :     "modifier symbol",
     155             :     "other symbol",
     156             :     "initial punctuation",
     157             :     "final punctuation",
     158             :     "noncharacter",
     159             :     "lead surrogate",
     160             :     "trail surrogate"
     161             : };
     162             : 
     163             : /* implementation ----------------------------------------------------------- */
     164             : 
     165           0 : static UBool U_CALLCONV unames_cleanup(void)
     166             : {
     167           0 :     if(uCharNamesData) {
     168           0 :         udata_close(uCharNamesData);
     169           0 :         uCharNamesData = NULL;
     170             :     }
     171           0 :     if(uCharNames) {
     172           0 :         uCharNames = NULL;
     173             :     }
     174           0 :     gCharNamesInitOnce.reset();
     175           0 :     gMaxNameLength=0;
     176           0 :     return TRUE;
     177             : }
     178             : 
     179             : static UBool U_CALLCONV
     180           0 : isAcceptable(void * /*context*/,
     181             :              const char * /*type*/, const char * /*name*/,
     182             :              const UDataInfo *pInfo) {
     183             :     return (UBool)(
     184           0 :         pInfo->size>=20 &&
     185           0 :         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     186           0 :         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     187           0 :         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
     188           0 :         pInfo->dataFormat[1]==0x6e &&
     189           0 :         pInfo->dataFormat[2]==0x61 &&
     190           0 :         pInfo->dataFormat[3]==0x6d &&
     191           0 :         pInfo->formatVersion[0]==1);
     192             : }
     193             : 
     194             : static void U_CALLCONV
     195           0 : loadCharNames(UErrorCode &status) {
     196           0 :     U_ASSERT(uCharNamesData == NULL);
     197           0 :     U_ASSERT(uCharNames == NULL);
     198             : 
     199           0 :     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
     200           0 :     if(U_FAILURE(status)) {
     201           0 :         uCharNamesData = NULL;
     202             :     } else {
     203           0 :         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
     204             :     }
     205           0 :     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
     206           0 : }
     207             : 
     208             : 
     209             : static UBool
     210           0 : isDataLoaded(UErrorCode *pErrorCode) {
     211           0 :     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
     212           0 :     return U_SUCCESS(*pErrorCode);
     213             : }
     214             : 
     215             : #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
     216             :     if((bufferLength)>0) { \
     217             :         *(buffer)++=c; \
     218             :         --(bufferLength); \
     219             :     } \
     220             :     ++(bufferPos); \
     221             : }
     222             : 
     223             : #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
     224             : 
     225             : /*
     226             :  * Important: expandName() and compareName() are almost the same -
     227             :  * apply fixes to both.
     228             :  *
     229             :  * UnicodeData.txt uses ';' as a field separator, so no
     230             :  * field can contain ';' as part of its contents.
     231             :  * In unames.dat, it is marked as token[';']==-1 only if the
     232             :  * semicolon is used in the data file - which is iff we
     233             :  * have Unicode 1.0 names or ISO comments or aliases.
     234             :  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
     235             :  * although we know that it will never be part of a name.
     236             :  */
     237             : static uint16_t
     238           0 : expandName(UCharNames *names,
     239             :            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
     240             :            char *buffer, uint16_t bufferLength) {
     241           0 :     uint16_t *tokens=(uint16_t *)names+8;
     242           0 :     uint16_t token, tokenCount=*tokens++, bufferPos=0;
     243           0 :     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
     244             :     uint8_t c;
     245             : 
     246           0 :     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
     247             :         /*
     248             :          * skip the modern name if it is not requested _and_
     249             :          * if the semicolon byte value is a character, not a token number
     250             :          */
     251           0 :         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
     252           0 :             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
     253           0 :             do {
     254           0 :                 while(nameLength>0) {
     255           0 :                     --nameLength;
     256           0 :                     if(*name++==';') {
     257           0 :                         break;
     258             :                     }
     259             :                 }
     260           0 :             } while(--fieldIndex>0);
     261             :         } else {
     262             :             /*
     263             :              * the semicolon byte value is a token number, therefore
     264             :              * only modern names are stored in unames.dat and there is no
     265             :              * such requested alternate name here
     266             :              */
     267           0 :             nameLength=0;
     268             :         }
     269             :     }
     270             : 
     271             :     /* write each letter directly, and write a token word per token */
     272           0 :     while(nameLength>0) {
     273           0 :         --nameLength;
     274           0 :         c=*name++;
     275             : 
     276           0 :         if(c>=tokenCount) {
     277           0 :             if(c!=';') {
     278             :                 /* implicit letter */
     279           0 :                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     280             :             } else {
     281             :                 /* finished */
     282           0 :                 break;
     283             :             }
     284             :         } else {
     285           0 :             token=tokens[c];
     286           0 :             if(token==(uint16_t)(-2)) {
     287             :                 /* this is a lead byte for a double-byte token */
     288           0 :                 token=tokens[c<<8|*name++];
     289           0 :                 --nameLength;
     290             :             }
     291           0 :             if(token==(uint16_t)(-1)) {
     292           0 :                 if(c!=';') {
     293             :                     /* explicit letter */
     294           0 :                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     295             :                 } else {
     296             :                     /* stop, but skip the semicolon if we are seeking
     297             :                        extended names and there was no 2.0 name but there
     298             :                        is a 1.0 name. */
     299           0 :                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
     300           0 :                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
     301           0 :                             continue;
     302             :                         }
     303             :                     }
     304             :                     /* finished */
     305           0 :                     break;
     306             :                 }
     307             :             } else {
     308             :                 /* write token word */
     309           0 :                 uint8_t *tokenString=tokenStrings+token;
     310           0 :                 while((c=*tokenString++)!=0) {
     311           0 :                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     312             :                 }
     313             :             }
     314             :         }
     315             :     }
     316             : 
     317             :     /* zero-terminate */
     318           0 :     if(bufferLength>0) {
     319           0 :         *buffer=0;
     320             :     }
     321             : 
     322           0 :     return bufferPos;
     323             : }
     324             : 
     325             : /*
     326             :  * compareName() is almost the same as expandName() except that it compares
     327             :  * the currently expanded name to an input name.
     328             :  * It returns the match/no match result as soon as possible.
     329             :  */
     330             : static UBool
     331           0 : compareName(UCharNames *names,
     332             :             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
     333             :             const char *otherName) {
     334           0 :     uint16_t *tokens=(uint16_t *)names+8;
     335           0 :     uint16_t token, tokenCount=*tokens++;
     336           0 :     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
     337             :     uint8_t c;
     338           0 :     const char *origOtherName = otherName;
     339             : 
     340           0 :     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
     341             :         /*
     342             :          * skip the modern name if it is not requested _and_
     343             :          * if the semicolon byte value is a character, not a token number
     344             :          */
     345           0 :         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
     346           0 :             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
     347           0 :             do {
     348           0 :                 while(nameLength>0) {
     349           0 :                     --nameLength;
     350           0 :                     if(*name++==';') {
     351           0 :                         break;
     352             :                     }
     353             :                 }
     354           0 :             } while(--fieldIndex>0);
     355             :         } else {
     356             :             /*
     357             :              * the semicolon byte value is a token number, therefore
     358             :              * only modern names are stored in unames.dat and there is no
     359             :              * such requested alternate name here
     360             :              */
     361           0 :             nameLength=0;
     362             :         }
     363             :     }
     364             : 
     365             :     /* compare each letter directly, and compare a token word per token */
     366           0 :     while(nameLength>0) {
     367           0 :         --nameLength;
     368           0 :         c=*name++;
     369             : 
     370           0 :         if(c>=tokenCount) {
     371           0 :             if(c!=';') {
     372             :                 /* implicit letter */
     373           0 :                 if((char)c!=*otherName++) {
     374           0 :                     return FALSE;
     375             :                 }
     376             :             } else {
     377             :                 /* finished */
     378           0 :                 break;
     379             :             }
     380             :         } else {
     381           0 :             token=tokens[c];
     382           0 :             if(token==(uint16_t)(-2)) {
     383             :                 /* this is a lead byte for a double-byte token */
     384           0 :                 token=tokens[c<<8|*name++];
     385           0 :                 --nameLength;
     386             :             }
     387           0 :             if(token==(uint16_t)(-1)) {
     388           0 :                 if(c!=';') {
     389             :                     /* explicit letter */
     390           0 :                     if((char)c!=*otherName++) {
     391           0 :                         return FALSE;
     392             :                     }
     393             :                 } else {
     394             :                     /* stop, but skip the semicolon if we are seeking
     395             :                        extended names and there was no 2.0 name but there
     396             :                        is a 1.0 name. */
     397           0 :                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
     398           0 :                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
     399           0 :                             continue;
     400             :                         }
     401             :                     }
     402             :                     /* finished */
     403           0 :                     break;
     404             :                 }
     405             :             } else {
     406             :                 /* write token word */
     407           0 :                 uint8_t *tokenString=tokenStrings+token;
     408           0 :                 while((c=*tokenString++)!=0) {
     409           0 :                     if((char)c!=*otherName++) {
     410           0 :                         return FALSE;
     411             :                     }
     412             :                 }
     413             :             }
     414             :         }
     415             :     }
     416             : 
     417             :     /* complete match? */
     418           0 :     return (UBool)(*otherName==0);
     419             : }
     420             : 
     421           0 : static uint8_t getCharCat(UChar32 cp) {
     422             :     uint8_t cat;
     423             : 
     424           0 :     if (U_IS_UNICODE_NONCHAR(cp)) {
     425           0 :         return U_NONCHARACTER_CODE_POINT;
     426             :     }
     427             : 
     428           0 :     if ((cat = u_charType(cp)) == U_SURROGATE) {
     429           0 :         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
     430             :     }
     431             : 
     432           0 :     return cat;
     433             : }
     434             : 
     435           0 : static const char *getCharCatName(UChar32 cp) {
     436           0 :     uint8_t cat = getCharCat(cp);
     437             : 
     438             :     /* Return unknown if the table of names above is not up to
     439             :        date. */
     440             : 
     441           0 :     if (cat >= UPRV_LENGTHOF(charCatNames)) {
     442           0 :         return "unknown";
     443             :     } else {
     444           0 :         return charCatNames[cat];
     445             :     }
     446             : }
     447             : 
     448           0 : static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
     449           0 :     const char *catname = getCharCatName(code);
     450           0 :     uint16_t length = 0;
     451             : 
     452             :     UChar32 cp;
     453             :     int ndigits, i;
     454             :     
     455           0 :     WRITE_CHAR(buffer, bufferLength, length, '<');
     456           0 :     while (catname[length - 1]) {
     457           0 :         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
     458             :     }
     459           0 :     WRITE_CHAR(buffer, bufferLength, length, '-');
     460           0 :     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
     461             :         ;
     462           0 :     if (ndigits < 4)
     463           0 :         ndigits = 4;
     464           0 :     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
     465           0 :         uint8_t v = (uint8_t)(cp & 0xf);
     466           0 :         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
     467             :     }
     468           0 :     buffer += ndigits;
     469           0 :     length += ndigits;
     470           0 :     WRITE_CHAR(buffer, bufferLength, length, '>');
     471             : 
     472           0 :     return length;
     473             : }
     474             : 
     475             : /*
     476             :  * getGroup() does a binary search for the group that contains the
     477             :  * Unicode code point "code".
     478             :  * The return value is always a valid Group* that may contain "code"
     479             :  * or else is the highest group before "code".
     480             :  * If the lowest group is after "code", then that one is returned.
     481             :  */
     482             : static const uint16_t *
     483           0 : getGroup(UCharNames *names, uint32_t code) {
     484           0 :     const uint16_t *groups=GET_GROUPS(names);
     485           0 :     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
     486           0 :              start=0,
     487           0 :              limit=*groups++,
     488             :              number;
     489             : 
     490             :     /* binary search for the group of names that contains the one for code */
     491           0 :     while(start<limit-1) {
     492           0 :         number=(uint16_t)((start+limit)/2);
     493           0 :         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
     494           0 :             limit=number;
     495             :         } else {
     496           0 :             start=number;
     497             :         }
     498             :     }
     499             : 
     500             :     /* return this regardless of whether it is an exact match */
     501           0 :     return groups+start*GROUP_LENGTH;
     502             : }
     503             : 
     504             : /*
     505             :  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
     506             :  * expands them into offsets and lengths for each string.
     507             :  * Lengths are stored with a variable-width encoding in consecutive nibbles:
     508             :  * If a nibble<0xc, then it is the length itself (0=empty string).
     509             :  * If a nibble>=0xc, then it forms a length value with the following nibble.
     510             :  * Calculation see below.
     511             :  * The offsets and lengths arrays must be at least 33 (one more) long because
     512             :  * there is no check here at the end if the last nibble is still used.
     513             :  */
     514             : static const uint8_t *
     515           0 : expandGroupLengths(const uint8_t *s,
     516             :                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
     517             :     /* read the lengths of the 32 strings in this group and get each string's offset */
     518           0 :     uint16_t i=0, offset=0, length=0;
     519             :     uint8_t lengthByte;
     520             : 
     521             :     /* all 32 lengths must be read to get the offset of the first group string */
     522           0 :     while(i<LINES_PER_GROUP) {
     523           0 :         lengthByte=*s++;
     524             : 
     525             :         /* read even nibble - MSBs of lengthByte */
     526           0 :         if(length>=12) {
     527             :             /* double-nibble length spread across two bytes */
     528           0 :             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
     529           0 :             lengthByte&=0xf;
     530           0 :         } else if((lengthByte /* &0xf0 */)>=0xc0) {
     531             :             /* double-nibble length spread across this one byte */
     532           0 :             length=(uint16_t)((lengthByte&0x3f)+12);
     533             :         } else {
     534             :             /* single-nibble length in MSBs */
     535           0 :             length=(uint16_t)(lengthByte>>4);
     536           0 :             lengthByte&=0xf;
     537             :         }
     538             : 
     539           0 :         *offsets++=offset;
     540           0 :         *lengths++=length;
     541             : 
     542           0 :         offset+=length;
     543           0 :         ++i;
     544             : 
     545             :         /* read odd nibble - LSBs of lengthByte */
     546           0 :         if((lengthByte&0xf0)==0) {
     547             :             /* this nibble was not consumed for a double-nibble length above */
     548           0 :             length=lengthByte;
     549           0 :             if(length<12) {
     550             :                 /* single-nibble length in LSBs */
     551           0 :                 *offsets++=offset;
     552           0 :                 *lengths++=length;
     553             : 
     554           0 :                 offset+=length;
     555           0 :                 ++i;
     556             :             }
     557             :         } else {
     558           0 :             length=0;   /* prevent double-nibble detection in the next iteration */
     559             :         }
     560             :     }
     561             : 
     562             :     /* now, s is at the first group string */
     563           0 :     return s;
     564             : }
     565             : 
     566             : static uint16_t
     567           0 : expandGroupName(UCharNames *names, const uint16_t *group,
     568             :                 uint16_t lineNumber, UCharNameChoice nameChoice,
     569             :                 char *buffer, uint16_t bufferLength) {
     570             :     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
     571           0 :     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
     572           0 :     s=expandGroupLengths(s, offsets, lengths);
     573           0 :     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
     574           0 :                       buffer, bufferLength);
     575             : }
     576             : 
     577             : static uint16_t
     578           0 : getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
     579             :         char *buffer, uint16_t bufferLength) {
     580           0 :     const uint16_t *group=getGroup(names, code);
     581           0 :     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
     582           0 :         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
     583           0 :                                buffer, bufferLength);
     584             :     } else {
     585             :         /* group not found */
     586             :         /* zero-terminate */
     587           0 :         if(bufferLength>0) {
     588           0 :             *buffer=0;
     589             :         }
     590           0 :         return 0;
     591             :     }
     592             : }
     593             : 
     594             : /*
     595             :  * enumGroupNames() enumerates all the names in a 32-group
     596             :  * and either calls the enumerator function or finds a given input name.
     597             :  */
     598             : static UBool
     599           0 : enumGroupNames(UCharNames *names, const uint16_t *group,
     600             :                UChar32 start, UChar32 end,
     601             :                UEnumCharNamesFn *fn, void *context,
     602             :                UCharNameChoice nameChoice) {
     603             :     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
     604           0 :     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
     605             : 
     606           0 :     s=expandGroupLengths(s, offsets, lengths);
     607           0 :     if(fn!=DO_FIND_NAME) {
     608             :         char buffer[200];
     609             :         uint16_t length;
     610             : 
     611           0 :         while(start<=end) {
     612           0 :             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
     613           0 :             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
     614           0 :                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
     615             :             }
     616             :             /* here, we assume that the buffer is large enough */
     617           0 :             if(length>0) {
     618           0 :                 if(!fn(context, start, nameChoice, buffer, length)) {
     619           0 :                     return FALSE;
     620             :                 }
     621             :             }
     622           0 :             ++start;
     623             :         }
     624             :     } else {
     625           0 :         const char *otherName=((FindName *)context)->otherName;
     626           0 :         while(start<=end) {
     627           0 :             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
     628           0 :                 ((FindName *)context)->code=start;
     629           0 :                 return FALSE;
     630             :             }
     631           0 :             ++start;
     632             :         }
     633             :     }
     634           0 :     return TRUE;
     635             : }
     636             : 
     637             : /*
     638             :  * enumExtNames enumerate extended names.
     639             :  * It only needs to do it if it is called with a real function and not
     640             :  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
     641             :  * for extended names by itself.
     642             :  */ 
     643             : static UBool
     644           0 : enumExtNames(UChar32 start, UChar32 end,
     645             :              UEnumCharNamesFn *fn, void *context)
     646             : {
     647           0 :     if(fn!=DO_FIND_NAME) {
     648             :         char buffer[200];
     649             :         uint16_t length;
     650             :         
     651           0 :         while(start<=end) {
     652           0 :             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
     653             :             /* here, we assume that the buffer is large enough */
     654           0 :             if(length>0) {
     655           0 :                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
     656           0 :                     return FALSE;
     657             :                 }
     658             :             }
     659           0 :             ++start;
     660             :         }
     661             :     }
     662             : 
     663           0 :     return TRUE;
     664             : }
     665             : 
     666             : static UBool
     667           0 : enumNames(UCharNames *names,
     668             :           UChar32 start, UChar32 limit,
     669             :           UEnumCharNamesFn *fn, void *context,
     670             :           UCharNameChoice nameChoice) {
     671             :     uint16_t startGroupMSB, endGroupMSB, groupCount;
     672             :     const uint16_t *group, *groupLimit;
     673             : 
     674           0 :     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
     675           0 :     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
     676             : 
     677             :     /* find the group that contains start, or the highest before it */
     678           0 :     group=getGroup(names, start);
     679             : 
     680           0 :     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
     681             :         /* enumerate synthetic names between start and the group start */
     682           0 :         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
     683           0 :         if(extLimit>limit) {
     684           0 :             extLimit=limit;
     685             :         }
     686           0 :         if(!enumExtNames(start, extLimit-1, fn, context)) {
     687           0 :             return FALSE;
     688             :         }
     689           0 :         start=extLimit;
     690             :     }
     691             : 
     692           0 :     if(startGroupMSB==endGroupMSB) {
     693           0 :         if(startGroupMSB==group[GROUP_MSB]) {
     694             :             /* if start and limit-1 are in the same group, then enumerate only in that one */
     695           0 :             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
     696             :         }
     697             :     } else {
     698           0 :         const uint16_t *groups=GET_GROUPS(names);
     699           0 :         groupCount=*groups++;
     700           0 :         groupLimit=groups+groupCount*GROUP_LENGTH;
     701             : 
     702           0 :         if(startGroupMSB==group[GROUP_MSB]) {
     703             :             /* enumerate characters in the partial start group */
     704           0 :             if((start&GROUP_MASK)!=0) {
     705           0 :                 if(!enumGroupNames(names, group,
     706           0 :                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
     707             :                                    fn, context, nameChoice)) {
     708           0 :                     return FALSE;
     709             :                 }
     710           0 :                 group=NEXT_GROUP(group); /* continue with the next group */
     711             :             }
     712           0 :         } else if(startGroupMSB>group[GROUP_MSB]) {
     713             :             /* make sure that we start enumerating with the first group after start */
     714           0 :             const uint16_t *nextGroup=NEXT_GROUP(group);
     715           0 :             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
     716           0 :                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
     717           0 :                 if (end > limit) {
     718           0 :                     end = limit;
     719             :                 }
     720           0 :                 if (!enumExtNames(start, end - 1, fn, context)) {
     721           0 :                     return FALSE;
     722             :                 }
     723             :             }
     724           0 :             group=nextGroup;
     725             :         }
     726             : 
     727             :         /* enumerate entire groups between the start- and end-groups */
     728           0 :         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
     729             :             const uint16_t *nextGroup;
     730           0 :             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
     731           0 :             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
     732           0 :                 return FALSE;
     733             :             }
     734           0 :             nextGroup=NEXT_GROUP(group);
     735           0 :             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
     736           0 :                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
     737           0 :                 if (end > limit) {
     738           0 :                     end = limit;
     739             :                 }
     740           0 :                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
     741           0 :                     return FALSE;
     742             :                 }
     743             :             }
     744           0 :             group=nextGroup;
     745             :         }
     746             : 
     747             :         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
     748           0 :         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
     749           0 :             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
     750           0 :         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
     751           0 :             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
     752           0 :             if (next > start) {
     753           0 :                 start = next;
     754           0 :             }
     755             :         } else {
     756           0 :             return TRUE;
     757             :         }
     758             :     }
     759             : 
     760             :     /* we have not found a group, which means everything is made of
     761             :        extended names. */
     762           0 :     if (nameChoice == U_EXTENDED_CHAR_NAME) {
     763           0 :         if (limit > UCHAR_MAX_VALUE + 1) {
     764           0 :             limit = UCHAR_MAX_VALUE + 1;
     765             :         }
     766           0 :         return enumExtNames(start, limit - 1, fn, context);
     767             :     }
     768             :     
     769           0 :     return TRUE;
     770             : }
     771             : 
     772             : static uint16_t
     773           0 : writeFactorSuffix(const uint16_t *factors, uint16_t count,
     774             :                   const char *s, /* suffix elements */
     775             :                   uint32_t code,
     776             :                   uint16_t indexes[8], /* output fields from here */
     777             :                   const char *elementBases[8], const char *elements[8],
     778             :                   char *buffer, uint16_t bufferLength) {
     779           0 :     uint16_t i, factor, bufferPos=0;
     780             :     char c;
     781             : 
     782             :     /* write elements according to the factors */
     783             : 
     784             :     /*
     785             :      * the factorized elements are determined by modulo arithmetic
     786             :      * with the factors of this algorithm
     787             :      *
     788             :      * note that for fewer operations, count is decremented here
     789             :      */
     790           0 :     --count;
     791           0 :     for(i=count; i>0; --i) {
     792           0 :         factor=factors[i];
     793           0 :         indexes[i]=(uint16_t)(code%factor);
     794           0 :         code/=factor;
     795             :     }
     796             :     /*
     797             :      * we don't need to calculate the last modulus because start<=code<=end
     798             :      * guarantees here that code<=factors[0]
     799             :      */
     800           0 :     indexes[0]=(uint16_t)code;
     801             : 
     802             :     /* write each element */
     803             :     for(;;) {
     804           0 :         if(elementBases!=NULL) {
     805           0 :             *elementBases++=s;
     806             :         }
     807             : 
     808             :         /* skip indexes[i] strings */
     809           0 :         factor=indexes[i];
     810           0 :         while(factor>0) {
     811           0 :             while(*s++!=0) {}
     812           0 :             --factor;
     813             :         }
     814           0 :         if(elements!=NULL) {
     815           0 :             *elements++=s;
     816             :         }
     817             : 
     818             :         /* write element */
     819           0 :         while((c=*s++)!=0) {
     820           0 :             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     821             :         }
     822             : 
     823             :         /* we do not need to perform the rest of this loop for i==count - break here */
     824           0 :         if(i>=count) {
     825           0 :             break;
     826             :         }
     827             : 
     828             :         /* skip the rest of the strings for this factors[i] */
     829           0 :         factor=(uint16_t)(factors[i]-indexes[i]-1);
     830           0 :         while(factor>0) {
     831           0 :             while(*s++!=0) {}
     832           0 :             --factor;
     833             :         }
     834             : 
     835           0 :         ++i;
     836             :     }
     837             : 
     838             :     /* zero-terminate */
     839           0 :     if(bufferLength>0) {
     840           0 :         *buffer=0;
     841             :     }
     842             : 
     843           0 :     return bufferPos;
     844             : }
     845             : 
     846             : /*
     847             :  * Important:
     848             :  * Parts of findAlgName() are almost the same as some of getAlgName().
     849             :  * Fixes must be applied to both.
     850             :  */
     851             : static uint16_t
     852           0 : getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
     853             :         char *buffer, uint16_t bufferLength) {
     854           0 :     uint16_t bufferPos=0;
     855             : 
     856             :     /* Only the normative character name can be algorithmic. */
     857           0 :     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
     858             :         /* zero-terminate */
     859           0 :         if(bufferLength>0) {
     860           0 :             *buffer=0;
     861             :         }
     862           0 :         return 0;
     863             :     }
     864             : 
     865           0 :     switch(range->type) {
     866             :     case 0: {
     867             :         /* name = prefix hex-digits */
     868           0 :         const char *s=(const char *)(range+1);
     869             :         char c;
     870             : 
     871             :         uint16_t i, count;
     872             : 
     873             :         /* copy prefix */
     874           0 :         while((c=*s++)!=0) {
     875           0 :             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     876             :         }
     877             : 
     878             :         /* write hexadecimal code point value */
     879           0 :         count=range->variant;
     880             : 
     881             :         /* zero-terminate */
     882           0 :         if(count<bufferLength) {
     883           0 :             buffer[count]=0;
     884             :         }
     885             : 
     886           0 :         for(i=count; i>0;) {
     887           0 :             if(--i<bufferLength) {
     888           0 :                 c=(char)(code&0xf);
     889           0 :                 if(c<10) {
     890           0 :                     c+='0';
     891             :                 } else {
     892           0 :                     c+='A'-10;
     893             :                 }
     894           0 :                 buffer[i]=c;
     895             :             }
     896           0 :             code>>=4;
     897             :         }
     898             : 
     899           0 :         bufferPos+=count;
     900           0 :         break;
     901             :     }
     902             :     case 1: {
     903             :         /* name = prefix factorized-elements */
     904             :         uint16_t indexes[8];
     905           0 :         const uint16_t *factors=(const uint16_t *)(range+1);
     906           0 :         uint16_t count=range->variant;
     907           0 :         const char *s=(const char *)(factors+count);
     908             :         char c;
     909             : 
     910             :         /* copy prefix */
     911           0 :         while((c=*s++)!=0) {
     912           0 :             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
     913             :         }
     914             : 
     915           0 :         bufferPos+=writeFactorSuffix(factors, count,
     916           0 :                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
     917           0 :         break;
     918             :     }
     919             :     default:
     920             :         /* undefined type */
     921             :         /* zero-terminate */
     922           0 :         if(bufferLength>0) {
     923           0 :             *buffer=0;
     924             :         }
     925           0 :         break;
     926             :     }
     927             : 
     928           0 :     return bufferPos;
     929             : }
     930             : 
     931             : /*
     932             :  * Important: enumAlgNames() and findAlgName() are almost the same.
     933             :  * Any fix must be applied to both.
     934             :  */
     935             : static UBool
     936           0 : enumAlgNames(AlgorithmicRange *range,
     937             :              UChar32 start, UChar32 limit,
     938             :              UEnumCharNamesFn *fn, void *context,
     939             :              UCharNameChoice nameChoice) {
     940             :     char buffer[200];
     941             :     uint16_t length;
     942             : 
     943           0 :     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
     944           0 :         return TRUE;
     945             :     }
     946             : 
     947           0 :     switch(range->type) {
     948             :     case 0: {
     949             :         char *s, *end;
     950             :         char c;
     951             : 
     952             :         /* get the full name of the start character */
     953           0 :         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
     954           0 :         if(length<=0) {
     955           0 :             return TRUE;
     956             :         }
     957             : 
     958             :         /* call the enumerator function with this first character */
     959           0 :         if(!fn(context, start, nameChoice, buffer, length)) {
     960           0 :             return FALSE;
     961             :         }
     962             : 
     963             :         /* go to the end of the name; all these names have the same length */
     964           0 :         end=buffer;
     965           0 :         while(*end!=0) {
     966           0 :             ++end;
     967             :         }
     968             : 
     969             :         /* enumerate the rest of the names */
     970           0 :         while(++start<limit) {
     971             :             /* increment the hexadecimal number on a character-basis */
     972           0 :             s=end;
     973             :             for (;;) {
     974           0 :                 c=*--s;
     975           0 :                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
     976           0 :                     *s=(char)(c+1);
     977           0 :                     break;
     978           0 :                 } else if(c=='9') {
     979           0 :                     *s='A';
     980           0 :                     break;
     981           0 :                 } else if(c=='F') {
     982           0 :                     *s='0';
     983             :                 }
     984             :             }
     985             : 
     986           0 :             if(!fn(context, start, nameChoice, buffer, length)) {
     987           0 :                 return FALSE;
     988             :             }
     989             :         }
     990           0 :         break;
     991             :     }
     992             :     case 1: {
     993             :         uint16_t indexes[8];
     994             :         const char *elementBases[8], *elements[8];
     995           0 :         const uint16_t *factors=(const uint16_t *)(range+1);
     996           0 :         uint16_t count=range->variant;
     997           0 :         const char *s=(const char *)(factors+count);
     998             :         char *suffix, *t;
     999             :         uint16_t prefixLength, i, idx;
    1000             : 
    1001             :         char c;
    1002             : 
    1003             :         /* name = prefix factorized-elements */
    1004             : 
    1005             :         /* copy prefix */
    1006           0 :         suffix=buffer;
    1007           0 :         prefixLength=0;
    1008           0 :         while((c=*s++)!=0) {
    1009           0 :             *suffix++=c;
    1010           0 :             ++prefixLength;
    1011             :         }
    1012             : 
    1013             :         /* append the suffix of the start character */
    1014           0 :         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
    1015           0 :                                               s, (uint32_t)start-range->start,
    1016             :                                               indexes, elementBases, elements,
    1017           0 :                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
    1018             : 
    1019             :         /* call the enumerator function with this first character */
    1020           0 :         if(!fn(context, start, nameChoice, buffer, length)) {
    1021           0 :             return FALSE;
    1022             :         }
    1023             : 
    1024             :         /* enumerate the rest of the names */
    1025           0 :         while(++start<limit) {
    1026             :             /* increment the indexes in lexical order bound by the factors */
    1027           0 :             i=count;
    1028             :             for (;;) {
    1029           0 :                 idx=(uint16_t)(indexes[--i]+1);
    1030           0 :                 if(idx<factors[i]) {
    1031             :                     /* skip one index and its element string */
    1032           0 :                     indexes[i]=idx;
    1033           0 :                     s=elements[i];
    1034           0 :                     while(*s++!=0) {
    1035             :                     }
    1036           0 :                     elements[i]=s;
    1037           0 :                     break;
    1038             :                 } else {
    1039             :                     /* reset this index to 0 and its element string to the first one */
    1040           0 :                     indexes[i]=0;
    1041           0 :                     elements[i]=elementBases[i];
    1042             :                 }
    1043             :             }
    1044             : 
    1045             :             /* to make matters a little easier, just append all elements to the suffix */
    1046           0 :             t=suffix;
    1047           0 :             length=prefixLength;
    1048           0 :             for(i=0; i<count; ++i) {
    1049           0 :                 s=elements[i];
    1050           0 :                 while((c=*s++)!=0) {
    1051           0 :                     *t++=c;
    1052           0 :                     ++length;
    1053             :                 }
    1054             :             }
    1055             :             /* zero-terminate */
    1056           0 :             *t=0;
    1057             : 
    1058           0 :             if(!fn(context, start, nameChoice, buffer, length)) {
    1059           0 :                 return FALSE;
    1060             :             }
    1061             :         }
    1062           0 :         break;
    1063             :     }
    1064             :     default:
    1065             :         /* undefined type */
    1066           0 :         break;
    1067             :     }
    1068             : 
    1069           0 :     return TRUE;
    1070             : }
    1071             : 
    1072             : /*
    1073             :  * findAlgName() is almost the same as enumAlgNames() except that it
    1074             :  * returns the code point for a name if it fits into the range.
    1075             :  * It returns 0xffff otherwise.
    1076             :  */
    1077             : static UChar32
    1078           0 : findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
    1079             :     UChar32 code;
    1080             : 
    1081           0 :     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
    1082           0 :         return 0xffff;
    1083             :     }
    1084             : 
    1085           0 :     switch(range->type) {
    1086             :     case 0: {
    1087             :         /* name = prefix hex-digits */
    1088           0 :         const char *s=(const char *)(range+1);
    1089             :         char c;
    1090             : 
    1091             :         uint16_t i, count;
    1092             : 
    1093             :         /* compare prefix */
    1094           0 :         while((c=*s++)!=0) {
    1095           0 :             if((char)c!=*otherName++) {
    1096           0 :                 return 0xffff;
    1097             :             }
    1098             :         }
    1099             : 
    1100             :         /* read hexadecimal code point value */
    1101           0 :         count=range->variant;
    1102           0 :         code=0;
    1103           0 :         for(i=0; i<count; ++i) {
    1104           0 :             c=*otherName++;
    1105           0 :             if('0'<=c && c<='9') {
    1106           0 :                 code=(code<<4)|(c-'0');
    1107           0 :             } else if('A'<=c && c<='F') {
    1108           0 :                 code=(code<<4)|(c-'A'+10);
    1109             :             } else {
    1110           0 :                 return 0xffff;
    1111             :             }
    1112             :         }
    1113             : 
    1114             :         /* does it fit into the range? */
    1115           0 :         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
    1116           0 :             return code;
    1117             :         }
    1118           0 :         break;
    1119             :     }
    1120             :     case 1: {
    1121             :         char buffer[64];
    1122             :         uint16_t indexes[8];
    1123             :         const char *elementBases[8], *elements[8];
    1124           0 :         const uint16_t *factors=(const uint16_t *)(range+1);
    1125           0 :         uint16_t count=range->variant;
    1126           0 :         const char *s=(const char *)(factors+count), *t;
    1127             :         UChar32 start, limit;
    1128             :         uint16_t i, idx;
    1129             : 
    1130             :         char c;
    1131             : 
    1132             :         /* name = prefix factorized-elements */
    1133             : 
    1134             :         /* compare prefix */
    1135           0 :         while((c=*s++)!=0) {
    1136           0 :             if((char)c!=*otherName++) {
    1137           0 :                 return 0xffff;
    1138             :             }
    1139             :         }
    1140             : 
    1141           0 :         start=(UChar32)range->start;
    1142           0 :         limit=(UChar32)(range->end+1);
    1143             : 
    1144             :         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
    1145           0 :         writeFactorSuffix(factors, count, s, 0,
    1146           0 :                           indexes, elementBases, elements, buffer, sizeof(buffer));
    1147             : 
    1148             :         /* compare the first suffix */
    1149           0 :         if(0==uprv_strcmp(otherName, buffer)) {
    1150           0 :             return start;
    1151             :         }
    1152             : 
    1153             :         /* enumerate and compare the rest of the suffixes */
    1154           0 :         while(++start<limit) {
    1155             :             /* increment the indexes in lexical order bound by the factors */
    1156           0 :             i=count;
    1157             :             for (;;) {
    1158           0 :                 idx=(uint16_t)(indexes[--i]+1);
    1159           0 :                 if(idx<factors[i]) {
    1160             :                     /* skip one index and its element string */
    1161           0 :                     indexes[i]=idx;
    1162           0 :                     s=elements[i];
    1163           0 :                     while(*s++!=0) {}
    1164           0 :                     elements[i]=s;
    1165           0 :                     break;
    1166             :                 } else {
    1167             :                     /* reset this index to 0 and its element string to the first one */
    1168           0 :                     indexes[i]=0;
    1169           0 :                     elements[i]=elementBases[i];
    1170             :                 }
    1171             :             }
    1172             : 
    1173             :             /* to make matters a little easier, just compare all elements of the suffix */
    1174           0 :             t=otherName;
    1175           0 :             for(i=0; i<count; ++i) {
    1176           0 :                 s=elements[i];
    1177           0 :                 while((c=*s++)!=0) {
    1178           0 :                     if(c!=*t++) {
    1179           0 :                         s=""; /* does not match */
    1180           0 :                         i=99;
    1181             :                     }
    1182             :                 }
    1183             :             }
    1184           0 :             if(i<99 && *t==0) {
    1185           0 :                 return start;
    1186             :             }
    1187             :         }
    1188           0 :         break;
    1189             :     }
    1190             :     default:
    1191             :         /* undefined type */
    1192           0 :         break;
    1193             :     }
    1194             : 
    1195           0 :     return 0xffff;
    1196             : }
    1197             : 
    1198             : /* sets of name characters, maximum name lengths ---------------------------- */
    1199             : 
    1200             : #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
    1201             : #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
    1202             : 
    1203             : static int32_t
    1204           0 : calcStringSetLength(uint32_t set[8], const char *s) {
    1205           0 :     int32_t length=0;
    1206             :     char c;
    1207             : 
    1208           0 :     while((c=*s++)!=0) {
    1209           0 :         SET_ADD(set, c);
    1210           0 :         ++length;
    1211             :     }
    1212           0 :     return length;
    1213             : }
    1214             : 
    1215             : static int32_t
    1216           0 : calcAlgNameSetsLengths(int32_t maxNameLength) {
    1217             :     AlgorithmicRange *range;
    1218             :     uint32_t *p;
    1219             :     uint32_t rangeCount;
    1220             :     int32_t length;
    1221             : 
    1222             :     /* enumerate algorithmic ranges */
    1223           0 :     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
    1224           0 :     rangeCount=*p;
    1225           0 :     range=(AlgorithmicRange *)(p+1);
    1226           0 :     while(rangeCount>0) {
    1227           0 :         switch(range->type) {
    1228             :         case 0:
    1229             :             /* name = prefix + (range->variant times) hex-digits */
    1230             :             /* prefix */
    1231           0 :             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
    1232           0 :             if(length>maxNameLength) {
    1233           0 :                 maxNameLength=length;
    1234             :             }
    1235           0 :             break;
    1236             :         case 1: {
    1237             :             /* name = prefix factorized-elements */
    1238           0 :             const uint16_t *factors=(const uint16_t *)(range+1);
    1239             :             const char *s;
    1240           0 :             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
    1241             : 
    1242             :             /* prefix length */
    1243           0 :             s=(const char *)(factors+count);
    1244           0 :             length=calcStringSetLength(gNameSet, s);
    1245           0 :             s+=length+1; /* start of factor suffixes */
    1246             : 
    1247             :             /* get the set and maximum factor suffix length for each factor */
    1248           0 :             for(i=0; i<count; ++i) {
    1249           0 :                 maxFactorLength=0;
    1250           0 :                 for(factor=factors[i]; factor>0; --factor) {
    1251           0 :                     factorLength=calcStringSetLength(gNameSet, s);
    1252           0 :                     s+=factorLength+1;
    1253           0 :                     if(factorLength>maxFactorLength) {
    1254           0 :                         maxFactorLength=factorLength;
    1255             :                     }
    1256             :                 }
    1257           0 :                 length+=maxFactorLength;
    1258             :             }
    1259             : 
    1260           0 :             if(length>maxNameLength) {
    1261           0 :                 maxNameLength=length;
    1262             :             }
    1263           0 :             break;
    1264             :         }
    1265             :         default:
    1266             :             /* unknown type */
    1267           0 :             break;
    1268             :         }
    1269             : 
    1270           0 :         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
    1271           0 :         --rangeCount;
    1272             :     }
    1273           0 :     return maxNameLength;
    1274             : }
    1275             : 
    1276             : static int32_t
    1277           0 : calcExtNameSetsLengths(int32_t maxNameLength) {
    1278             :     int32_t i, length;
    1279             : 
    1280           0 :     for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
    1281             :         /*
    1282             :          * for each category, count the length of the category name
    1283             :          * plus 9=
    1284             :          * 2 for <>
    1285             :          * 1 for -
    1286             :          * 6 for most hex digits per code point
    1287             :          */
    1288           0 :         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
    1289           0 :         if(length>maxNameLength) {
    1290           0 :             maxNameLength=length;
    1291             :         }
    1292             :     }
    1293           0 :     return maxNameLength;
    1294             : }
    1295             : 
    1296             : static int32_t
    1297           0 : calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
    1298             :                   uint32_t set[8],
    1299             :                   const uint8_t **pLine, const uint8_t *lineLimit) {
    1300           0 :     const uint8_t *line=*pLine;
    1301           0 :     int32_t length=0, tokenLength;
    1302             :     uint16_t c, token;
    1303             : 
    1304           0 :     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
    1305           0 :         if(c>=tokenCount) {
    1306             :             /* implicit letter */
    1307           0 :             SET_ADD(set, c);
    1308           0 :             ++length;
    1309             :         } else {
    1310           0 :             token=tokens[c];
    1311           0 :             if(token==(uint16_t)(-2)) {
    1312             :                 /* this is a lead byte for a double-byte token */
    1313           0 :                 c=c<<8|*line++;
    1314           0 :                 token=tokens[c];
    1315             :             }
    1316           0 :             if(token==(uint16_t)(-1)) {
    1317             :                 /* explicit letter */
    1318           0 :                 SET_ADD(set, c);
    1319           0 :                 ++length;
    1320             :             } else {
    1321             :                 /* count token word */
    1322           0 :                 if(tokenLengths!=NULL) {
    1323             :                     /* use cached token length */
    1324           0 :                     tokenLength=tokenLengths[c];
    1325           0 :                     if(tokenLength==0) {
    1326           0 :                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
    1327           0 :                         tokenLengths[c]=(int8_t)tokenLength;
    1328             :                     }
    1329             :                 } else {
    1330           0 :                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
    1331             :                 }
    1332           0 :                 length+=tokenLength;
    1333             :             }
    1334             :         }
    1335             :     }
    1336             : 
    1337           0 :     *pLine=line;
    1338           0 :     return length;
    1339             : }
    1340             : 
    1341             : static void
    1342           0 : calcGroupNameSetsLengths(int32_t maxNameLength) {
    1343             :     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
    1344             : 
    1345           0 :     uint16_t *tokens=(uint16_t *)uCharNames+8;
    1346           0 :     uint16_t tokenCount=*tokens++;
    1347           0 :     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
    1348             : 
    1349             :     int8_t *tokenLengths;
    1350             : 
    1351             :     const uint16_t *group;
    1352             :     const uint8_t *s, *line, *lineLimit;
    1353             : 
    1354             :     int32_t groupCount, lineNumber, length;
    1355             : 
    1356           0 :     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
    1357           0 :     if(tokenLengths!=NULL) {
    1358           0 :         uprv_memset(tokenLengths, 0, tokenCount);
    1359             :     }
    1360             : 
    1361           0 :     group=GET_GROUPS(uCharNames);
    1362           0 :     groupCount=*group++;
    1363             : 
    1364             :     /* enumerate all groups */
    1365           0 :     while(groupCount>0) {
    1366           0 :         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
    1367           0 :         s=expandGroupLengths(s, offsets, lengths);
    1368             : 
    1369             :         /* enumerate all lines in each group */
    1370           0 :         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
    1371           0 :             line=s+offsets[lineNumber];
    1372           0 :             length=lengths[lineNumber];
    1373           0 :             if(length==0) {
    1374           0 :                 continue;
    1375             :             }
    1376             : 
    1377           0 :             lineLimit=line+length;
    1378             : 
    1379             :             /* read regular name */
    1380           0 :             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
    1381           0 :             if(length>maxNameLength) {
    1382           0 :                 maxNameLength=length;
    1383             :             }
    1384           0 :             if(line==lineLimit) {
    1385           0 :                 continue;
    1386             :             }
    1387             : 
    1388             :             /* read Unicode 1.0 name */
    1389           0 :             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
    1390           0 :             if(length>maxNameLength) {
    1391           0 :                 maxNameLength=length;
    1392             :             }
    1393           0 :             if(line==lineLimit) {
    1394           0 :                 continue;
    1395             :             }
    1396             : 
    1397             :             /* read ISO comment */
    1398             :             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
    1399             :         }
    1400             : 
    1401           0 :         group=NEXT_GROUP(group);
    1402           0 :         --groupCount;
    1403             :     }
    1404             : 
    1405           0 :     if(tokenLengths!=NULL) {
    1406           0 :         uprv_free(tokenLengths);
    1407             :     }
    1408             : 
    1409             :     /* set gMax... - name length last for threading */
    1410           0 :     gMaxNameLength=maxNameLength;
    1411           0 : }
    1412             : 
    1413             : static UBool
    1414           0 : calcNameSetsLengths(UErrorCode *pErrorCode) {
    1415             :     static const char extChars[]="0123456789ABCDEF<>-";
    1416             :     int32_t i, maxNameLength;
    1417             : 
    1418           0 :     if(gMaxNameLength!=0) {
    1419           0 :         return TRUE;
    1420             :     }
    1421             : 
    1422           0 :     if(!isDataLoaded(pErrorCode)) {
    1423           0 :         return FALSE;
    1424             :     }
    1425             : 
    1426             :     /* set hex digits, used in various names, and <>-, used in extended names */
    1427           0 :     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
    1428           0 :         SET_ADD(gNameSet, extChars[i]);
    1429             :     }
    1430             : 
    1431             :     /* set sets and lengths from algorithmic names */
    1432           0 :     maxNameLength=calcAlgNameSetsLengths(0);
    1433             : 
    1434             :     /* set sets and lengths from extended names */
    1435           0 :     maxNameLength=calcExtNameSetsLengths(maxNameLength);
    1436             : 
    1437             :     /* set sets and lengths from group names, set global maximum values */
    1438           0 :     calcGroupNameSetsLengths(maxNameLength);
    1439             : 
    1440           0 :     return TRUE;
    1441             : }
    1442             : 
    1443             : U_NAMESPACE_END
    1444             : 
    1445             : /* public API --------------------------------------------------------------- */
    1446             : 
    1447             : U_NAMESPACE_USE
    1448             : 
    1449             : U_CAPI int32_t U_EXPORT2
    1450           0 : u_charName(UChar32 code, UCharNameChoice nameChoice,
    1451             :            char *buffer, int32_t bufferLength,
    1452             :            UErrorCode *pErrorCode) {
    1453             :      AlgorithmicRange *algRange;
    1454             :     uint32_t *p;
    1455             :     uint32_t i;
    1456             :     int32_t length;
    1457             : 
    1458             :     /* check the argument values */
    1459           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1460           0 :         return 0;
    1461           0 :     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
    1462           0 :               bufferLength<0 || (bufferLength>0 && buffer==NULL)
    1463             :     ) {
    1464           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1465           0 :         return 0;
    1466             :     }
    1467             : 
    1468           0 :     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
    1469           0 :         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
    1470             :     }
    1471             : 
    1472           0 :     length=0;
    1473             : 
    1474             :     /* try algorithmic names first */
    1475           0 :     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
    1476           0 :     i=*p;
    1477           0 :     algRange=(AlgorithmicRange *)(p+1);
    1478           0 :     while(i>0) {
    1479           0 :         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
    1480           0 :             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
    1481           0 :             break;
    1482             :         }
    1483           0 :         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
    1484           0 :         --i;
    1485             :     }
    1486             : 
    1487           0 :     if(i==0) {
    1488           0 :         if (nameChoice == U_EXTENDED_CHAR_NAME) {
    1489           0 :             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
    1490           0 :             if (!length) {
    1491             :                 /* extended character name */
    1492           0 :                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
    1493             :             }
    1494             :         } else {
    1495             :             /* normal character name */
    1496           0 :             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
    1497             :         }
    1498             :     }
    1499             : 
    1500           0 :     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
    1501             : }
    1502             : 
    1503             : U_CAPI int32_t U_EXPORT2
    1504           0 : u_getISOComment(UChar32 /*c*/,
    1505             :                 char *dest, int32_t destCapacity,
    1506             :                 UErrorCode *pErrorCode) {
    1507             :     /* check the argument values */
    1508           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1509           0 :         return 0;
    1510           0 :     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
    1511           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1512           0 :         return 0;
    1513             :     }
    1514             : 
    1515           0 :     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
    1516             : }
    1517             : 
    1518             : U_CAPI UChar32 U_EXPORT2
    1519           0 : u_charFromName(UCharNameChoice nameChoice,
    1520             :                const char *name,
    1521             :                UErrorCode *pErrorCode) {
    1522             :     char upper[120], lower[120];
    1523             :     FindName findName;
    1524             :     AlgorithmicRange *algRange;
    1525             :     uint32_t *p;
    1526             :     uint32_t i;
    1527           0 :     UChar32 cp = 0;
    1528             :     char c0;
    1529           0 :     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
    1530             : 
    1531           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1532           0 :         return error;
    1533             :     }
    1534             : 
    1535           0 :     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
    1536           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1537           0 :         return error;
    1538             :     }
    1539             : 
    1540           0 :     if(!isDataLoaded(pErrorCode)) {
    1541           0 :         return error;
    1542             :     }
    1543             : 
    1544             :     /* construct the uppercase and lowercase of the name first */
    1545           0 :     for(i=0; i<sizeof(upper); ++i) {
    1546           0 :         if((c0=*name++)!=0) {
    1547           0 :             upper[i]=uprv_toupper(c0);
    1548           0 :             lower[i]=uprv_tolower(c0);
    1549             :         } else {
    1550           0 :             upper[i]=lower[i]=0;
    1551           0 :             break;
    1552             :         }
    1553             :     }
    1554           0 :     if(i==sizeof(upper)) {
    1555             :         /* name too long, there is no such character */
    1556           0 :         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    1557           0 :         return error;
    1558             :     }
    1559             :     // i==strlen(name)==strlen(lower)==strlen(upper)
    1560             : 
    1561             :     /* try extended names first */
    1562           0 :     if (lower[0] == '<') {
    1563           0 :         if (nameChoice == U_EXTENDED_CHAR_NAME) {
    1564             :             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
    1565           0 :             if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
    1566           0 :                 while (i >= 3 && lower[--i] != '-') {}
    1567             : 
    1568           0 :                 if (i >= 2 && lower[i] == '-') {
    1569             :                     uint32_t cIdx;
    1570             : 
    1571           0 :                     lower[i] = 0;
    1572             : 
    1573           0 :                     for (++i; lower[i] != '>'; ++i) {
    1574           0 :                         if (lower[i] >= '0' && lower[i] <= '9') {
    1575           0 :                             cp = (cp << 4) + lower[i] - '0';
    1576           0 :                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
    1577           0 :                             cp = (cp << 4) + lower[i] - 'a' + 10;
    1578             :                         } else {
    1579           0 :                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    1580           0 :                             return error;
    1581             :                         }
    1582             :                     }
    1583             : 
    1584             :                     /* Now validate the category name.
    1585             :                        We could use a binary search, or a trie, if
    1586             :                        we really wanted to. */
    1587             : 
    1588           0 :                     for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
    1589             : 
    1590           0 :                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
    1591           0 :                             if (getCharCat(cp) == cIdx) {
    1592           0 :                                 return cp;
    1593             :                             }
    1594           0 :                             break;
    1595             :                         }
    1596             :                     }
    1597             :                 }
    1598             :             }
    1599             :         }
    1600             : 
    1601           0 :         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    1602           0 :         return error;
    1603             :     }
    1604             : 
    1605             :     /* try algorithmic names now */
    1606           0 :     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
    1607           0 :     i=*p;
    1608           0 :     algRange=(AlgorithmicRange *)(p+1);
    1609           0 :     while(i>0) {
    1610           0 :         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
    1611           0 :             return cp;
    1612             :         }
    1613           0 :         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
    1614           0 :         --i;
    1615             :     }
    1616             : 
    1617             :     /* normal character name */
    1618           0 :     findName.otherName=upper;
    1619           0 :     findName.code=error;
    1620           0 :     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
    1621           0 :     if (findName.code == error) {
    1622           0 :          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
    1623             :     }
    1624           0 :     return findName.code;
    1625             : }
    1626             : 
    1627             : U_CAPI void U_EXPORT2
    1628           0 : u_enumCharNames(UChar32 start, UChar32 limit,
    1629             :                 UEnumCharNamesFn *fn,
    1630             :                 void *context,
    1631             :                 UCharNameChoice nameChoice,
    1632             :                 UErrorCode *pErrorCode) {
    1633             :     AlgorithmicRange *algRange;
    1634             :     uint32_t *p;
    1635             :     uint32_t i;
    1636             : 
    1637           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1638           0 :         return;
    1639             :     }
    1640             : 
    1641           0 :     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
    1642           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1643           0 :         return;
    1644             :     }
    1645             : 
    1646           0 :     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
    1647           0 :         limit = UCHAR_MAX_VALUE + 1;
    1648             :     }
    1649           0 :     if((uint32_t)start>=(uint32_t)limit) {
    1650           0 :         return;
    1651             :     }
    1652             : 
    1653           0 :     if(!isDataLoaded(pErrorCode)) {
    1654           0 :         return;
    1655             :     }
    1656             : 
    1657             :     /* interleave the data-driven ones with the algorithmic ones */
    1658             :     /* iterate over all algorithmic ranges; assume that they are in ascending order */
    1659           0 :     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
    1660           0 :     i=*p;
    1661           0 :     algRange=(AlgorithmicRange *)(p+1);
    1662           0 :     while(i>0) {
    1663             :         /* enumerate the character names before the current algorithmic range */
    1664             :         /* here: start<limit */
    1665           0 :         if((uint32_t)start<algRange->start) {
    1666           0 :             if((uint32_t)limit<=algRange->start) {
    1667           0 :                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
    1668           0 :                 return;
    1669             :             }
    1670           0 :             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
    1671           0 :                 return;
    1672             :             }
    1673           0 :             start=(UChar32)algRange->start;
    1674             :         }
    1675             :         /* enumerate the character names in the current algorithmic range */
    1676             :         /* here: algRange->start<=start<limit */
    1677           0 :         if((uint32_t)start<=algRange->end) {
    1678           0 :             if((uint32_t)limit<=(algRange->end+1)) {
    1679           0 :                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
    1680           0 :                 return;
    1681             :             }
    1682           0 :             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
    1683           0 :                 return;
    1684             :             }
    1685           0 :             start=(UChar32)algRange->end+1;
    1686             :         }
    1687             :         /* continue to the next algorithmic range (here: start<limit) */
    1688           0 :         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
    1689           0 :         --i;
    1690             :     }
    1691             :     /* enumerate the character names after the last algorithmic range */
    1692           0 :     enumNames(uCharNames, start, limit, fn, context, nameChoice);
    1693             : }
    1694             : 
    1695             : U_CAPI int32_t U_EXPORT2
    1696           0 : uprv_getMaxCharNameLength() {
    1697           0 :     UErrorCode errorCode=U_ZERO_ERROR;
    1698           0 :     if(calcNameSetsLengths(&errorCode)) {
    1699           0 :         return gMaxNameLength;
    1700             :     } else {
    1701           0 :         return 0;
    1702             :     }
    1703             : }
    1704             : 
    1705             : /**
    1706             :  * Converts the char set cset into a Unicode set uset.
    1707             :  * @param cset Set of 256 bit flags corresponding to a set of chars.
    1708             :  * @param uset USet to receive characters. Existing contents are deleted.
    1709             :  */
    1710             : static void
    1711           0 : charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
    1712             :     UChar us[256];
    1713             :     char cs[256];
    1714             : 
    1715             :     int32_t i, length;
    1716             :     UErrorCode errorCode;
    1717             : 
    1718           0 :     errorCode=U_ZERO_ERROR;
    1719             : 
    1720           0 :     if(!calcNameSetsLengths(&errorCode)) {
    1721           0 :         return;
    1722             :     }
    1723             : 
    1724             :     /* build a char string with all chars that are used in character names */
    1725           0 :     length=0;
    1726           0 :     for(i=0; i<256; ++i) {
    1727           0 :         if(SET_CONTAINS(cset, i)) {
    1728           0 :             cs[length++]=(char)i;
    1729             :         }
    1730             :     }
    1731             : 
    1732             :     /* convert the char string to a UChar string */
    1733           0 :     u_charsToUChars(cs, us, length);
    1734             : 
    1735             :     /* add each UChar to the USet */
    1736           0 :     for(i=0; i<length; ++i) {
    1737           0 :         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
    1738           0 :             sa->add(sa->set, us[i]);
    1739             :         }
    1740             :     }
    1741             : }
    1742             : 
    1743             : /**
    1744             :  * Fills set with characters that are used in Unicode character names.
    1745             :  * @param set USet to receive characters.
    1746             :  */
    1747             : U_CAPI void U_EXPORT2
    1748           0 : uprv_getCharNameCharacters(const USetAdder *sa) {
    1749           0 :     charSetToUSet(gNameSet, sa);
    1750           0 : }
    1751             : 
    1752             : /* data swapping ------------------------------------------------------------ */
    1753             : 
    1754             : /*
    1755             :  * The token table contains non-negative entries for token bytes,
    1756             :  * and -1 for bytes that represent themselves in the data file's charset.
    1757             :  * -2 entries are used for lead bytes.
    1758             :  *
    1759             :  * Direct bytes (-1 entries) must be translated from the input charset family
    1760             :  * to the output charset family.
    1761             :  * makeTokenMap() writes a permutation mapping for this.
    1762             :  * Use it once for single-/lead-byte tokens and once more for all trail byte
    1763             :  * tokens. (';' is an unused trail byte marked with -1.)
    1764             :  */
    1765             : static void
    1766           0 : makeTokenMap(const UDataSwapper *ds,
    1767             :              int16_t tokens[], uint16_t tokenCount,
    1768             :              uint8_t map[256],
    1769             :              UErrorCode *pErrorCode) {
    1770             :     UBool usedOutChar[256];
    1771             :     uint16_t i, j;
    1772             :     uint8_t c1, c2;
    1773             : 
    1774           0 :     if(U_FAILURE(*pErrorCode)) {
    1775           0 :         return;
    1776             :     }
    1777             : 
    1778           0 :     if(ds->inCharset==ds->outCharset) {
    1779             :         /* Same charset family: identity permutation */
    1780           0 :         for(i=0; i<256; ++i) {
    1781           0 :             map[i]=(uint8_t)i;
    1782             :         }
    1783             :     } else {
    1784           0 :         uprv_memset(map, 0, 256);
    1785           0 :         uprv_memset(usedOutChar, 0, 256);
    1786             : 
    1787           0 :         if(tokenCount>256) {
    1788           0 :             tokenCount=256;
    1789             :         }
    1790             : 
    1791             :         /* set the direct bytes (byte 0 always maps to itself) */
    1792           0 :         for(i=1; i<tokenCount; ++i) {
    1793           0 :             if(tokens[i]==-1) {
    1794             :                 /* convert the direct byte character */
    1795           0 :                 c1=(uint8_t)i;
    1796           0 :                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
    1797           0 :                 if(U_FAILURE(*pErrorCode)) {
    1798           0 :                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
    1799           0 :                                      i, ds->inCharset);
    1800           0 :                     return;
    1801             :                 }
    1802             : 
    1803             :                 /* enter the converted character into the map and mark it used */
    1804           0 :                 map[c1]=c2;
    1805           0 :                 usedOutChar[c2]=TRUE;
    1806             :             }
    1807             :         }
    1808             : 
    1809             :         /* set the mappings for the rest of the permutation */
    1810           0 :         for(i=j=1; i<tokenCount; ++i) {
    1811             :             /* set mappings that were not set for direct bytes */
    1812           0 :             if(map[i]==0) {
    1813             :                 /* set an output byte value that was not used as an output byte above */
    1814           0 :                 while(usedOutChar[j]) {
    1815           0 :                     ++j;
    1816             :                 }
    1817           0 :                 map[i]=(uint8_t)j++;
    1818             :             }
    1819             :         }
    1820             : 
    1821             :         /*
    1822             :          * leave mappings at tokenCount and above unset if tokenCount<256
    1823             :          * because they won't be used
    1824             :          */
    1825             :     }
    1826             : }
    1827             : 
    1828             : U_CAPI int32_t U_EXPORT2
    1829           0 : uchar_swapNames(const UDataSwapper *ds,
    1830             :                 const void *inData, int32_t length, void *outData,
    1831             :                 UErrorCode *pErrorCode) {
    1832             :     const UDataInfo *pInfo;
    1833             :     int32_t headerSize;
    1834             : 
    1835             :     const uint8_t *inBytes;
    1836             :     uint8_t *outBytes;
    1837             : 
    1838             :     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
    1839             :              offset, i, count, stringsCount;
    1840             : 
    1841             :     const AlgorithmicRange *inRange;
    1842             :     AlgorithmicRange *outRange;
    1843             : 
    1844             :     /* udata_swapDataHeader checks the arguments */
    1845           0 :     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    1846           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1847           0 :         return 0;
    1848             :     }
    1849             : 
    1850             :     /* check data format and format version */
    1851           0 :     pInfo=(const UDataInfo *)((const char *)inData+4);
    1852           0 :     if(!(
    1853           0 :         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
    1854           0 :         pInfo->dataFormat[1]==0x6e &&
    1855           0 :         pInfo->dataFormat[2]==0x61 &&
    1856           0 :         pInfo->dataFormat[3]==0x6d &&
    1857           0 :         pInfo->formatVersion[0]==1
    1858             :     )) {
    1859           0 :         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
    1860           0 :                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    1861           0 :                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    1862           0 :                          pInfo->formatVersion[0]);
    1863           0 :         *pErrorCode=U_UNSUPPORTED_ERROR;
    1864           0 :         return 0;
    1865             :     }
    1866             : 
    1867           0 :     inBytes=(const uint8_t *)inData+headerSize;
    1868           0 :     outBytes=(uint8_t *)outData+headerSize;
    1869           0 :     if(length<0) {
    1870           0 :         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
    1871             :     } else {
    1872           0 :         length-=headerSize;
    1873           0 :         if( length<20 ||
    1874           0 :             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
    1875             :         ) {
    1876             :             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
    1877           0 :                              length);
    1878           0 :             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    1879           0 :             return 0;
    1880             :         }
    1881             :     }
    1882             : 
    1883           0 :     if(length<0) {
    1884             :         /* preflighting: iterate through algorithmic ranges */
    1885           0 :         offset=algNamesOffset;
    1886           0 :         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
    1887           0 :         offset+=4;
    1888             : 
    1889           0 :         for(i=0; i<count; ++i) {
    1890           0 :             inRange=(const AlgorithmicRange *)(inBytes+offset);
    1891           0 :             offset+=ds->readUInt16(inRange->size);
    1892             :         }
    1893             :     } else {
    1894             :         /* swap data */
    1895             :         const uint16_t *p;
    1896             :         uint16_t *q, *temp;
    1897             : 
    1898             :         int16_t tokens[512];
    1899             :         uint16_t tokenCount;
    1900             : 
    1901             :         uint8_t map[256], trailMap[256];
    1902             : 
    1903             :         /* copy the data for inaccessible bytes */
    1904           0 :         if(inBytes!=outBytes) {
    1905           0 :             uprv_memcpy(outBytes, inBytes, length);
    1906             :         }
    1907             : 
    1908             :         /* the initial 4 offsets first */
    1909           0 :         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
    1910           0 :         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
    1911           0 :         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
    1912           0 :         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
    1913             : 
    1914             :         /*
    1915             :          * now the tokens table
    1916             :          * it needs to be permutated along with the compressed name strings
    1917             :          */
    1918           0 :         p=(const uint16_t *)(inBytes+16);
    1919           0 :         q=(uint16_t *)(outBytes+16);
    1920             : 
    1921             :         /* read and swap the tokenCount */
    1922           0 :         tokenCount=ds->readUInt16(*p);
    1923           0 :         ds->swapArray16(ds, p, 2, q, pErrorCode);
    1924           0 :         ++p;
    1925           0 :         ++q;
    1926             : 
    1927             :         /* read the first 512 tokens and make the token maps */
    1928           0 :         if(tokenCount<=512) {
    1929           0 :             count=tokenCount;
    1930             :         } else {
    1931           0 :             count=512;
    1932             :         }
    1933           0 :         for(i=0; i<count; ++i) {
    1934           0 :             tokens[i]=udata_readInt16(ds, p[i]);
    1935             :         }
    1936           0 :         for(; i<512; ++i) {
    1937           0 :             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
    1938             :         }
    1939           0 :         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
    1940           0 :         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
    1941           0 :         if(U_FAILURE(*pErrorCode)) {
    1942           0 :             return 0;
    1943             :         }
    1944             : 
    1945             :         /*
    1946             :          * swap and permutate the tokens
    1947             :          * go through a temporary array to support in-place swapping
    1948             :          */
    1949           0 :         temp=(uint16_t *)uprv_malloc(tokenCount*2);
    1950           0 :         if(temp==NULL) {
    1951           0 :             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
    1952           0 :                              tokenCount);
    1953           0 :             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    1954           0 :             return 0;
    1955             :         }
    1956             : 
    1957             :         /* swap and permutate single-/lead-byte tokens */
    1958           0 :         for(i=0; i<tokenCount && i<256; ++i) {
    1959           0 :             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
    1960             :         }
    1961             : 
    1962             :         /* swap and permutate trail-byte tokens */
    1963           0 :         for(; i<tokenCount; ++i) {
    1964           0 :             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
    1965             :         }
    1966             : 
    1967             :         /* copy the result into the output and free the temporary array */
    1968           0 :         uprv_memcpy(q, temp, tokenCount*2);
    1969           0 :         uprv_free(temp);
    1970             : 
    1971             :         /*
    1972             :          * swap the token strings but not a possible padding byte after
    1973             :          * the terminating NUL of the last string
    1974             :          */
    1975           0 :         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
    1976           0 :                                     outBytes+tokenStringOffset, pErrorCode);
    1977           0 :         if(U_FAILURE(*pErrorCode)) {
    1978           0 :             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
    1979           0 :             return 0;
    1980             :         }
    1981             : 
    1982             :         /* swap the group table */
    1983           0 :         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
    1984           0 :         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
    1985           0 :                            outBytes+groupsOffset, pErrorCode);
    1986             : 
    1987             :         /*
    1988             :          * swap the group strings
    1989             :          * swap the string bytes but not the nibble-encoded string lengths
    1990             :          */
    1991           0 :         if(ds->inCharset!=ds->outCharset) {
    1992             :             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
    1993             : 
    1994             :             const uint8_t *inStrings, *nextInStrings;
    1995             :             uint8_t *outStrings;
    1996             : 
    1997             :             uint8_t c;
    1998             : 
    1999           0 :             inStrings=inBytes+groupStringOffset;
    2000           0 :             outStrings=outBytes+groupStringOffset;
    2001             : 
    2002           0 :             stringsCount=algNamesOffset-groupStringOffset;
    2003             : 
    2004             :             /* iterate through string groups until only a few padding bytes are left */
    2005           0 :             while(stringsCount>32) {
    2006           0 :                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
    2007             : 
    2008             :                 /* move past the length bytes */
    2009           0 :                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
    2010           0 :                 outStrings+=nextInStrings-inStrings;
    2011           0 :                 inStrings=nextInStrings;
    2012             : 
    2013           0 :                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
    2014           0 :                 stringsCount-=count;
    2015             : 
    2016             :                 /* swap the string bytes using map[] and trailMap[] */
    2017           0 :                 while(count>0) {
    2018           0 :                     c=*inStrings++;
    2019           0 :                     *outStrings++=map[c];
    2020           0 :                     if(tokens[c]!=-2) {
    2021           0 :                         --count;
    2022             :                     } else {
    2023             :                         /* token lead byte: swap the trail byte, too */
    2024           0 :                         *outStrings++=trailMap[*inStrings++];
    2025           0 :                         count-=2;
    2026             :                     }
    2027             :                 }
    2028             :             }
    2029             :         }
    2030             : 
    2031             :         /* swap the algorithmic ranges */
    2032           0 :         offset=algNamesOffset;
    2033           0 :         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
    2034           0 :         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
    2035           0 :         offset+=4;
    2036             : 
    2037           0 :         for(i=0; i<count; ++i) {
    2038           0 :             if(offset>(uint32_t)length) {
    2039             :                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
    2040           0 :                                  length, i);
    2041           0 :                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    2042           0 :                 return 0;
    2043             :             }
    2044             : 
    2045           0 :             inRange=(const AlgorithmicRange *)(inBytes+offset);
    2046           0 :             outRange=(AlgorithmicRange *)(outBytes+offset);
    2047           0 :             offset+=ds->readUInt16(inRange->size);
    2048             : 
    2049           0 :             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
    2050           0 :             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
    2051           0 :             switch(inRange->type) {
    2052             :             case 0:
    2053             :                 /* swap prefix string */
    2054           0 :                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
    2055           0 :                                     outRange+1, pErrorCode);
    2056           0 :                 if(U_FAILURE(*pErrorCode)) {
    2057             :                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
    2058           0 :                                      i);
    2059           0 :                     return 0;
    2060             :                 }
    2061           0 :                 break;
    2062             :             case 1:
    2063             :                 {
    2064             :                     /* swap factors and the prefix and factor strings */
    2065             :                     uint32_t factorsCount;
    2066             : 
    2067           0 :                     factorsCount=inRange->variant;
    2068           0 :                     p=(const uint16_t *)(inRange+1);
    2069           0 :                     q=(uint16_t *)(outRange+1);
    2070           0 :                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
    2071             : 
    2072             :                     /* swap the strings, up to the last terminating NUL */
    2073           0 :                     p+=factorsCount;
    2074           0 :                     q+=factorsCount;
    2075           0 :                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
    2076           0 :                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
    2077           0 :                         --stringsCount;
    2078             :                     }
    2079           0 :                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
    2080             :                 }
    2081           0 :                 break;
    2082             :             default:
    2083           0 :                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
    2084           0 :                                  inRange->type, i);
    2085           0 :                 *pErrorCode=U_UNSUPPORTED_ERROR;
    2086           0 :                 return 0;
    2087             :             }
    2088             :         }
    2089             :     }
    2090             : 
    2091           0 :     return headerSize+(int32_t)offset;
    2092             : }
    2093             : 
    2094             : /*
    2095             :  * Hey, Emacs, please set the following:
    2096             :  *
    2097             :  * Local Variables:
    2098             :  * indent-tabs-mode: nil
    2099             :  * End:
    2100             :  *
    2101             :  */

Generated by: LCOV version 1.13