LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnv_io.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 56 513 10.9 %
Date: 2017-07-14 16:53:18 Functions: 4 37 10.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 1999-2015, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *
      11             : *
      12             : *  ucnv_io.cpp:
      13             : *  initializes global variables and defines functions pertaining to converter 
      14             : *  name resolution aspect of the conversion code.
      15             : *
      16             : *   new implementation:
      17             : *
      18             : *   created on: 1999nov22
      19             : *   created by: Markus W. Scherer
      20             : *
      21             : *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
      22             : *   with aliases for converter names.
      23             : *
      24             : *   Date        Name        Description
      25             : *   11/22/1999  markus      Created
      26             : *   06/28/2002  grhoten     Major overhaul of the converter alias design.
      27             : *                           Now an alias can map to different converters
      28             : *                           depending on the specified standard.
      29             : *******************************************************************************
      30             : */
      31             : 
      32             : #include "unicode/utypes.h"
      33             : 
      34             : #if !UCONFIG_NO_CONVERSION
      35             : 
      36             : #include "unicode/ucnv.h"
      37             : #include "unicode/udata.h"
      38             : 
      39             : #include "umutex.h"
      40             : #include "uarrsort.h"
      41             : #include "uassert.h"
      42             : #include "udataswp.h"
      43             : #include "cstring.h"
      44             : #include "cmemory.h"
      45             : #include "ucnv_io.h"
      46             : #include "uenumimp.h"
      47             : #include "ucln_cmn.h"
      48             : 
      49             : /* Format of cnvalias.icu -----------------------------------------------------
      50             :  *
      51             :  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
      52             :  * This binary form contains several tables. All indexes are to uint16_t
      53             :  * units, and not to the bytes (uint8_t units). Addressing everything on
      54             :  * 16-bit boundaries allows us to store more information with small index
      55             :  * numbers, which are also 16-bit in size. The majority of the table (except
      56             :  * the string table) are 16-bit numbers.
      57             :  *
      58             :  * First there is the size of the Table of Contents (TOC). The TOC
      59             :  * entries contain the size of each section. In order to find the offset
      60             :  * you just need to sum up the previous offsets.
      61             :  * The TOC length and entries are an array of uint32_t values.
      62             :  * The first section after the TOC starts immediately after the TOC.
      63             :  *
      64             :  * 1) This section contains a list of converters. This list contains indexes
      65             :  * into the string table for the converter name. The index of this list is
      66             :  * also used by other sections, which are mentioned later on.
      67             :  * This list is not sorted.
      68             :  *
      69             :  * 2) This section contains a list of tags. This list contains indexes
      70             :  * into the string table for the tag name. The index of this list is
      71             :  * also used by other sections, which are mentioned later on.
      72             :  * This list is in priority order of standards.
      73             :  *
      74             :  * 3) This section contains a list of sorted unique aliases. This
      75             :  * list contains indexes into the string table for the alias name. The
      76             :  * index of this list is also used by other sections, like the 4th section.
      77             :  * The index for the 3rd and 4th section is used to get the
      78             :  * alias -> converter name mapping. Section 3 and 4 form a two column table.
      79             :  * Some of the most significant bits of each index may contain other
      80             :  * information (see findConverter for details).
      81             :  *
      82             :  * 4) This section contains a list of mapped converter names. Consider this
      83             :  * as a table that maps the 3rd section to the 1st section. This list contains
      84             :  * indexes into the 1st section. The index of this list is the same index in
      85             :  * the 3rd section. There is also some extra information in the high bits of
      86             :  * each converter index in this table. Currently it's only used to say that
      87             :  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
      88             :  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
      89             :  * the predigested form of the 5th section so that an alias lookup can be fast.
      90             :  *
      91             :  * 5) This section contains a 2D array with indexes to the 6th section. This
      92             :  * section is the full form of all alias mappings. The column index is the
      93             :  * index into the converter list (column header). The row index is the index
      94             :  * to tag list (row header). This 2D array is the top part a 3D array. The
      95             :  * third dimension is in the 6th section.
      96             :  *
      97             :  * 6) This is blob of variable length arrays. Each array starts with a size,
      98             :  * and is followed by indexes to alias names in the string table. This is
      99             :  * the third dimension to the section 5. No other section should be referencing
     100             :  * this section.
     101             :  *
     102             :  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
     103             :  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
     104             :  * what type of string normalization is used among other potential things in the
     105             :  * future.
     106             :  *
     107             :  * 8) This is the string table. All strings are indexed on an even address.
     108             :  * There are two reasons for this. First many chip architectures locate strings
     109             :  * faster on even address boundaries. Second, since all indexes are 16-bit
     110             :  * numbers, this string table can be 128KB in size instead of 64KB when we
     111             :  * only have strings starting on an even address.
     112             :  *
     113             :  * 9) When present this is a set of prenormalized strings from section 8. This
     114             :  * table contains normalized strings with the dashes and spaces stripped out,
     115             :  * and all strings lowercased. In the future, the options in section 7 may state
     116             :  * other types of normalization.
     117             :  *
     118             :  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
     119             :  * has a unique alias among all converters. That same alias can
     120             :  * be mentioned in other standards on different converters,
     121             :  * but only one alias per tag can be unique.
     122             :  *
     123             :  *
     124             :  *              Converter Names (Usually in TR22 form)
     125             :  *           -------------------------------------------.
     126             :  *     T    /                                          /|
     127             :  *     a   /                                          / |
     128             :  *     g  /                                          /  |
     129             :  *     s /                                          /   |
     130             :  *      /                                          /    |
     131             :  *      ------------------------------------------/     |
     132             :  *    A |                                         |     |
     133             :  *    l |                                         |     |
     134             :  *    i |                                         |    /
     135             :  *    a |                                         |   /
     136             :  *    s |                                         |  /
     137             :  *    e |                                         | /
     138             :  *    s |                                         |/
     139             :  *      -------------------------------------------
     140             :  *
     141             :  *
     142             :  *
     143             :  * Here is what it really looks like. It's like swiss cheese.
     144             :  * There are holes. Some converters aren't recognized by
     145             :  * a standard, or they are really old converters that the
     146             :  * standard doesn't recognize anymore.
     147             :  *
     148             :  *              Converter Names (Usually in TR22 form)
     149             :  *           -------------------------------------------.
     150             :  *     T    /##########################################/|
     151             :  *     a   /     #            #                       /#
     152             :  *     g  /  #      ##     ##     ### # ### ### ### #/
     153             :  *     s / #             #####  ####        ##  ## #/#
     154             :  *      / ### # # ##  #  #   #          ### # #   #/##
     155             :  *      ------------------------------------------/# #
     156             :  *    A |### # # ##  #  #   #          ### # #   #|# #
     157             :  *    l |# # #    #     #               ## #     #|# #
     158             :  *    i |# # #    #     #                #       #|#
     159             :  *    a |#                                       #|#
     160             :  *    s |                                        #|#
     161             :  *    e
     162             :  *    s
     163             :  *
     164             :  */
     165             : 
     166             : /**
     167             :  * Used by the UEnumeration API
     168             :  */
     169             : typedef struct UAliasContext {
     170             :     uint32_t listOffset;
     171             :     uint32_t listIdx;
     172             : } UAliasContext;
     173             : 
     174             : static const char DATA_NAME[] = "cnvalias";
     175             : static const char DATA_TYPE[] = "icu";
     176             : 
     177             : static UDataMemory *gAliasData=NULL;
     178             : static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
     179             : 
     180             : enum {
     181             :     tocLengthIndex=0,
     182             :     converterListIndex=1,
     183             :     tagListIndex=2,
     184             :     aliasListIndex=3,
     185             :     untaggedConvArrayIndex=4,
     186             :     taggedAliasArrayIndex=5,
     187             :     taggedAliasListsIndex=6,
     188             :     tableOptionsIndex=7,
     189             :     stringTableIndex=8,
     190             :     normalizedStringTableIndex=9,
     191             :     offsetsCount,    /* length of the swapper's temporary offsets[] */
     192             :     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
     193             : };
     194             : 
     195             : static const UConverterAliasOptions defaultTableOptions = {
     196             :     UCNV_IO_UNNORMALIZED,
     197             :     0 /* containsCnvOptionInfo */
     198             : };
     199             : static UConverterAlias gMainTable;
     200             : 
     201             : #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
     202             : #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
     203             : 
     204             : static UBool U_CALLCONV
     205           3 : isAcceptable(void * /*context*/,
     206             :              const char * /*type*/, const char * /*name*/,
     207             :              const UDataInfo *pInfo) {
     208             :     return (UBool)(
     209           6 :         pInfo->size>=20 &&
     210           6 :         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     211           6 :         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     212           6 :         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
     213           6 :         pInfo->dataFormat[1]==0x76 &&
     214           6 :         pInfo->dataFormat[2]==0x41 &&
     215           9 :         pInfo->dataFormat[3]==0x6c &&
     216           6 :         pInfo->formatVersion[0]==3);
     217             : }
     218             : 
     219           0 : static UBool U_CALLCONV ucnv_io_cleanup(void)
     220             : {
     221           0 :     if (gAliasData) {
     222           0 :         udata_close(gAliasData);
     223           0 :         gAliasData = NULL;
     224             :     }
     225           0 :     gAliasDataInitOnce.reset();
     226             : 
     227           0 :     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
     228             : 
     229           0 :     return TRUE;                   /* Everything was cleaned up */
     230             : }
     231             : 
     232           3 : static void U_CALLCONV initAliasData(UErrorCode &errCode) {
     233             :     UDataMemory *data;
     234             :     const uint16_t *table;
     235             :     const uint32_t *sectionSizes;
     236             :     uint32_t tableStart;
     237             :     uint32_t currOffset;
     238             : 
     239           3 :     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
     240             : 
     241           3 :     U_ASSERT(gAliasData == NULL);
     242           3 :     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
     243           3 :     if(U_FAILURE(errCode)) {
     244           0 :         return;
     245             :     }
     246             : 
     247           3 :     sectionSizes = (const uint32_t *)udata_getMemory(data);
     248           3 :     table = (const uint16_t *)sectionSizes;
     249             : 
     250           3 :     tableStart      = sectionSizes[0];
     251           3 :     if (tableStart < minTocLength) {
     252           0 :         errCode = U_INVALID_FORMAT_ERROR;
     253           0 :         udata_close(data);
     254           0 :         return;
     255             :     }
     256           3 :     gAliasData = data;
     257             : 
     258           3 :     gMainTable.converterListSize      = sectionSizes[1];
     259           3 :     gMainTable.tagListSize            = sectionSizes[2];
     260           3 :     gMainTable.aliasListSize          = sectionSizes[3];
     261           3 :     gMainTable.untaggedConvArraySize  = sectionSizes[4];
     262           3 :     gMainTable.taggedAliasArraySize   = sectionSizes[5];
     263           3 :     gMainTable.taggedAliasListsSize   = sectionSizes[6];
     264           3 :     gMainTable.optionTableSize        = sectionSizes[7];
     265           3 :     gMainTable.stringTableSize        = sectionSizes[8];
     266             : 
     267           3 :     if (tableStart > 8) {
     268           3 :         gMainTable.normalizedStringTableSize = sectionSizes[9];
     269             :     }
     270             : 
     271           3 :     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
     272           3 :     gMainTable.converterList = table + currOffset;
     273             : 
     274           3 :     currOffset += gMainTable.converterListSize;
     275           3 :     gMainTable.tagList = table + currOffset;
     276             : 
     277           3 :     currOffset += gMainTable.tagListSize;
     278           3 :     gMainTable.aliasList = table + currOffset;
     279             : 
     280           3 :     currOffset += gMainTable.aliasListSize;
     281           3 :     gMainTable.untaggedConvArray = table + currOffset;
     282             : 
     283           3 :     currOffset += gMainTable.untaggedConvArraySize;
     284           3 :     gMainTable.taggedAliasArray = table + currOffset;
     285             : 
     286             :     /* aliasLists is a 1's based array, but it has a padding character */
     287           3 :     currOffset += gMainTable.taggedAliasArraySize;
     288           3 :     gMainTable.taggedAliasLists = table + currOffset;
     289             : 
     290           3 :     currOffset += gMainTable.taggedAliasListsSize;
     291           3 :     if (gMainTable.optionTableSize > 0
     292           3 :         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
     293             :     {
     294             :         /* Faster table */
     295           3 :         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
     296             :     }
     297             :     else {
     298             :         /* Smaller table, or I can't handle this normalization mode!
     299             :         Use the original slower table lookup. */
     300           0 :         gMainTable.optionTable = &defaultTableOptions;
     301             :     }
     302             : 
     303           3 :     currOffset += gMainTable.optionTableSize;
     304           3 :     gMainTable.stringTable = table + currOffset;
     305             : 
     306           3 :     currOffset += gMainTable.stringTableSize;
     307           6 :     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
     308           3 :         ? gMainTable.stringTable : (table + currOffset));
     309             : }
     310             : 
     311             : 
     312             : static UBool
     313           3 : haveAliasData(UErrorCode *pErrorCode) {
     314           3 :     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
     315           3 :     return U_SUCCESS(*pErrorCode);
     316             : }
     317             : 
     318             : static inline UBool
     319           0 : isAlias(const char *alias, UErrorCode *pErrorCode) {
     320           0 :     if(alias==NULL) {
     321           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     322           0 :         return FALSE;
     323             :     }
     324           0 :     return (UBool)(*alias!=0);
     325             : }
     326             : 
     327           0 : static uint32_t getTagNumber(const char *tagname) {
     328           0 :     if (gMainTable.tagList) {
     329             :         uint32_t tagNum;
     330           0 :         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
     331           0 :             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
     332           0 :                 return tagNum;
     333             :             }
     334             :         }
     335             :     }
     336             : 
     337           0 :     return UINT32_MAX;
     338             : }
     339             : 
     340             : /* character types relevant for ucnv_compareNames() */
     341             : enum {
     342             :     UIGNORE,
     343             :     ZERO,
     344             :     NONZERO,
     345             :     MINLETTER /* any values from here on are lowercase letter mappings */
     346             : };
     347             : 
     348             : /* character types for ASCII 00..7F */
     349             : static const uint8_t asciiTypes[128] = {
     350             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     351             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     352             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     353             :     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
     354             :     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
     355             :     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
     356             :     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
     357             :     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
     358             : };
     359             : 
     360             : #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
     361             : 
     362             : /* character types for EBCDIC 80..FF */
     363             : static const uint8_t ebcdicTypes[128] = {
     364             :     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
     365             :     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
     366             :     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
     367             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     368             :     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
     369             :     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
     370             :     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
     371             :     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
     372             : };
     373             : 
     374             : #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
     375             : 
     376             : #if U_CHARSET_FAMILY==U_ASCII_FAMILY
     377             : #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
     378             : #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
     379             : #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
     380             : #else
     381             : #   error U_CHARSET_FAMILY is not valid
     382             : #endif
     383             : 
     384             : 
     385             : /* @see ucnv_compareNames */
     386             : U_CAPI char * U_CALLCONV
     387           0 : ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
     388           0 :     char *dstItr = dst;
     389             :     uint8_t type, nextType;
     390             :     char c1;
     391           0 :     UBool afterDigit = FALSE;
     392             : 
     393           0 :     while ((c1 = *name++) != 0) {
     394           0 :         type = GET_ASCII_TYPE(c1);
     395           0 :         switch (type) {
     396             :         case UIGNORE:
     397           0 :             afterDigit = FALSE;
     398           0 :             continue; /* ignore all but letters and digits */
     399             :         case ZERO:
     400           0 :             if (!afterDigit) {
     401           0 :                 nextType = GET_ASCII_TYPE(*name);
     402           0 :                 if (nextType == ZERO || nextType == NONZERO) {
     403           0 :                     continue; /* ignore leading zero before another digit */
     404             :                 }
     405             :             }
     406           0 :             break;
     407             :         case NONZERO:
     408           0 :             afterDigit = TRUE;
     409           0 :             break;
     410             :         default:
     411           0 :             c1 = (char)type; /* lowercased letter */
     412           0 :             afterDigit = FALSE;
     413           0 :             break;
     414             :         }
     415           0 :         *dstItr++ = c1;
     416             :     }
     417           0 :     *dstItr = 0;
     418           0 :     return dst;
     419             : }
     420             : 
     421             : U_CAPI char * U_CALLCONV
     422           0 : ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
     423           0 :     char *dstItr = dst;
     424             :     uint8_t type, nextType;
     425             :     char c1;
     426           0 :     UBool afterDigit = FALSE;
     427             : 
     428           0 :     while ((c1 = *name++) != 0) {
     429           0 :         type = GET_EBCDIC_TYPE(c1);
     430           0 :         switch (type) {
     431             :         case UIGNORE:
     432           0 :             afterDigit = FALSE;
     433           0 :             continue; /* ignore all but letters and digits */
     434             :         case ZERO:
     435           0 :             if (!afterDigit) {
     436           0 :                 nextType = GET_EBCDIC_TYPE(*name);
     437           0 :                 if (nextType == ZERO || nextType == NONZERO) {
     438           0 :                     continue; /* ignore leading zero before another digit */
     439             :                 }
     440             :             }
     441           0 :             break;
     442             :         case NONZERO:
     443           0 :             afterDigit = TRUE;
     444           0 :             break;
     445             :         default:
     446           0 :             c1 = (char)type; /* lowercased letter */
     447           0 :             afterDigit = FALSE;
     448           0 :             break;
     449             :         }
     450           0 :         *dstItr++ = c1;
     451             :     }
     452           0 :     *dstItr = 0;
     453           0 :     return dst;
     454             : }
     455             : 
     456             : /**
     457             :  * Do a fuzzy compare of two converter/alias names.
     458             :  * The comparison is case-insensitive, ignores leading zeroes if they are not
     459             :  * followed by further digits, and ignores all but letters and digits.
     460             :  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
     461             :  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
     462             :  * at http://www.unicode.org/reports/tr22/
     463             :  *
     464             :  * This is a symmetrical (commutative) operation; order of arguments
     465             :  * is insignificant.  This is an important property for sorting the
     466             :  * list (when the list is preprocessed into binary form) and for
     467             :  * performing binary searches on it at run time.
     468             :  *
     469             :  * @param name1 a converter name or alias, zero-terminated
     470             :  * @param name2 a converter name or alias, zero-terminated
     471             :  * @return 0 if the names match, or a negative value if the name1
     472             :  * lexically precedes name2, or a positive value if the name1
     473             :  * lexically follows name2.
     474             :  *
     475             :  * @see ucnv_io_stripForCompare
     476             :  */
     477             : U_CAPI int U_EXPORT2
     478           0 : ucnv_compareNames(const char *name1, const char *name2) {
     479             :     int rc;
     480             :     uint8_t type, nextType;
     481             :     char c1, c2;
     482           0 :     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
     483             : 
     484           0 :     for (;;) {
     485           0 :         while ((c1 = *name1++) != 0) {
     486           0 :             type = GET_CHAR_TYPE(c1);
     487           0 :             switch (type) {
     488             :             case UIGNORE:
     489           0 :                 afterDigit1 = FALSE;
     490           0 :                 continue; /* ignore all but letters and digits */
     491             :             case ZERO:
     492           0 :                 if (!afterDigit1) {
     493           0 :                     nextType = GET_CHAR_TYPE(*name1);
     494           0 :                     if (nextType == ZERO || nextType == NONZERO) {
     495           0 :                         continue; /* ignore leading zero before another digit */
     496             :                     }
     497             :                 }
     498           0 :                 break;
     499             :             case NONZERO:
     500           0 :                 afterDigit1 = TRUE;
     501           0 :                 break;
     502             :             default:
     503           0 :                 c1 = (char)type; /* lowercased letter */
     504           0 :                 afterDigit1 = FALSE;
     505           0 :                 break;
     506             :             }
     507           0 :             break; /* deliver c1 */
     508             :         }
     509           0 :         while ((c2 = *name2++) != 0) {
     510           0 :             type = GET_CHAR_TYPE(c2);
     511           0 :             switch (type) {
     512             :             case UIGNORE:
     513           0 :                 afterDigit2 = FALSE;
     514           0 :                 continue; /* ignore all but letters and digits */
     515             :             case ZERO:
     516           0 :                 if (!afterDigit2) {
     517           0 :                     nextType = GET_CHAR_TYPE(*name2);
     518           0 :                     if (nextType == ZERO || nextType == NONZERO) {
     519           0 :                         continue; /* ignore leading zero before another digit */
     520             :                     }
     521             :                 }
     522           0 :                 break;
     523             :             case NONZERO:
     524           0 :                 afterDigit2 = TRUE;
     525           0 :                 break;
     526             :             default:
     527           0 :                 c2 = (char)type; /* lowercased letter */
     528           0 :                 afterDigit2 = FALSE;
     529           0 :                 break;
     530             :             }
     531           0 :             break; /* deliver c2 */
     532             :         }
     533             : 
     534             :         /* If we reach the ends of both strings then they match */
     535           0 :         if ((c1|c2)==0) {
     536           0 :             return 0;
     537             :         }
     538             : 
     539             :         /* Case-insensitive comparison */
     540           0 :         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
     541           0 :         if (rc != 0) {
     542           0 :             return rc;
     543             :         }
     544             :     }
     545             : }
     546             : 
     547             : /*
     548             :  * search for an alias
     549             :  * return the converter number index for gConverterList
     550             :  */
     551             : static inline uint32_t
     552           0 : findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
     553             :     uint32_t mid, start, limit;
     554             :     uint32_t lastMid;
     555             :     int result;
     556           0 :     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
     557             :     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
     558             : 
     559           0 :     if (!isUnnormalized) {
     560           0 :         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
     561           0 :             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
     562           0 :             return UINT32_MAX;
     563             :         }
     564             : 
     565             :         /* Lower case and remove ignoreable characters. */
     566           0 :         ucnv_io_stripForCompare(strippedName, alias);
     567           0 :         alias = strippedName;
     568             :     }
     569             : 
     570             :     /* do a binary search for the alias */
     571           0 :     start = 0;
     572           0 :     limit = gMainTable.untaggedConvArraySize;
     573           0 :     mid = limit;
     574           0 :     lastMid = UINT32_MAX;
     575             : 
     576             :     for (;;) {
     577           0 :         mid = (uint32_t)((start + limit) / 2);
     578           0 :         if (lastMid == mid) {   /* Have we moved? */
     579           0 :             break;  /* We haven't moved, and it wasn't found. */
     580             :         }
     581           0 :         lastMid = mid;
     582           0 :         if (isUnnormalized) {
     583           0 :             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
     584             :         }
     585             :         else {
     586           0 :             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
     587             :         }
     588             : 
     589           0 :         if (result < 0) {
     590           0 :             limit = mid;
     591           0 :         } else if (result > 0) {
     592           0 :             start = mid;
     593             :         } else {
     594             :             /* Since the gencnval tool folds duplicates into one entry,
     595             :              * this alias in gAliasList is unique, but different standards
     596             :              * may map an alias to different converters.
     597             :              */
     598           0 :             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
     599           0 :                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
     600             :             }
     601             :             /* State whether the canonical converter name contains an option.
     602             :             This information is contained in this list in order to maintain backward & forward compatibility. */
     603           0 :             if (containsOption) {
     604           0 :                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
     605           0 :                 *containsOption = (UBool)((containsCnvOptionInfo
     606           0 :                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
     607           0 :                     || !containsCnvOptionInfo);
     608             :             }
     609           0 :             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
     610             :         }
     611           0 :     }
     612             : 
     613           0 :     return UINT32_MAX;
     614             : }
     615             : 
     616             : /*
     617             :  * Is this alias in this list?
     618             :  * alias and listOffset should be non-NULL.
     619             :  */
     620             : static inline UBool
     621           0 : isAliasInList(const char *alias, uint32_t listOffset) {
     622           0 :     if (listOffset) {
     623             :         uint32_t currAlias;
     624           0 :         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
     625             :         /* +1 to skip listCount */
     626           0 :         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
     627           0 :         for (currAlias = 0; currAlias < listCount; currAlias++) {
     628           0 :             if (currList[currAlias]
     629           0 :                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
     630             :             {
     631           0 :                 return TRUE;
     632             :             }
     633             :         }
     634             :     }
     635           0 :     return FALSE;
     636             : }
     637             : 
     638             : /*
     639             :  * Search for an standard name of an alias (what is the default name
     640             :  * that this standard uses?)
     641             :  * return the listOffset for gTaggedAliasLists. If it's 0,
     642             :  * the it couldn't be found, but the parameters are valid.
     643             :  */
     644             : static uint32_t
     645           0 : findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
     646             :     uint32_t idx;
     647             :     uint32_t listOffset;
     648             :     uint32_t convNum;
     649           0 :     UErrorCode myErr = U_ZERO_ERROR;
     650           0 :     uint32_t tagNum = getTagNumber(standard);
     651             : 
     652             :     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
     653           0 :     convNum = findConverter(alias, NULL, &myErr);
     654           0 :     if (myErr != U_ZERO_ERROR) {
     655           0 :         *pErrorCode = myErr;
     656             :     }
     657             : 
     658           0 :     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
     659           0 :         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
     660           0 :         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
     661           0 :             return listOffset;
     662             :         }
     663           0 :         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
     664             :             /* Uh Oh! They used an ambiguous alias.
     665             :                We have to search the whole swiss cheese starting
     666             :                at the highest standard affinity.
     667             :                This may take a while.
     668             :             */
     669           0 :             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
     670           0 :                 listOffset = gMainTable.taggedAliasArray[idx];
     671           0 :                 if (listOffset && isAliasInList(alias, listOffset)) {
     672           0 :                     uint32_t currTagNum = idx/gMainTable.converterListSize;
     673           0 :                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
     674           0 :                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
     675           0 :                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
     676           0 :                         return tempListOffset;
     677             :                     }
     678             :                     /* else keep on looking */
     679             :                     /* We could speed this up by starting on the next row
     680             :                        because an alias is unique per row, right now.
     681             :                        This would change if alias versioning appears. */
     682             :                 }
     683             :             }
     684             :             /* The standard doesn't know about the alias */
     685             :         }
     686             :         /* else no default name */
     687           0 :         return 0;
     688             :     }
     689             :     /* else converter or tag not found */
     690             : 
     691           0 :     return UINT32_MAX;
     692             : }
     693             : 
     694             : /* Return the canonical name */
     695             : static uint32_t
     696           0 : findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
     697             :     uint32_t idx;
     698             :     uint32_t listOffset;
     699             :     uint32_t convNum;
     700           0 :     UErrorCode myErr = U_ZERO_ERROR;
     701           0 :     uint32_t tagNum = getTagNumber(standard);
     702             : 
     703             :     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
     704           0 :     convNum = findConverter(alias, NULL, &myErr);
     705           0 :     if (myErr != U_ZERO_ERROR) {
     706           0 :         *pErrorCode = myErr;
     707             :     }
     708             : 
     709           0 :     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
     710           0 :         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
     711           0 :         if (listOffset && isAliasInList(alias, listOffset)) {
     712           0 :             return convNum;
     713             :         }
     714           0 :         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
     715             :             /* Uh Oh! They used an ambiguous alias.
     716             :                We have to search one slice of the swiss cheese.
     717             :                We search only in the requested tag, not the whole thing.
     718             :                This may take a while.
     719             :             */
     720           0 :             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
     721           0 :             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
     722           0 :             for (idx = convStart; idx < convLimit; idx++) {
     723           0 :                 listOffset = gMainTable.taggedAliasArray[idx];
     724           0 :                 if (listOffset && isAliasInList(alias, listOffset)) {
     725           0 :                     return idx-convStart;
     726             :                 }
     727             :             }
     728             :             /* The standard doesn't know about the alias */
     729             :         }
     730             :         /* else no canonical name */
     731             :     }
     732             :     /* else converter or tag not found */
     733             : 
     734           0 :     return UINT32_MAX;
     735             : }
     736             : 
     737             : U_CAPI const char *
     738           0 : ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
     739           0 :     const char *aliasTmp = alias;
     740           0 :     int32_t i = 0;
     741           0 :     for (i = 0; i < 2; i++) {
     742           0 :         if (i == 1) {
     743             :             /*
     744             :              * After the first unsuccess converter lookup, check to see if
     745             :              * the name begins with 'x-'. If it does, strip it off and try
     746             :              * again.  This behaviour is similar to how ICU4J does it.
     747             :              */
     748           0 :             if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
     749           0 :                 aliasTmp = aliasTmp+2;
     750             :             } else {
     751             :                 break;
     752             :             }
     753             :         }
     754           0 :         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
     755           0 :             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
     756           0 :             if (convNum < gMainTable.converterListSize) {
     757           0 :                 return GET_STRING(gMainTable.converterList[convNum]);
     758             :             }
     759             :             /* else converter not found */
     760             :         } else {
     761           0 :             break;
     762             :         }
     763             :     }
     764             : 
     765           0 :     return NULL;
     766             : }
     767             : 
     768             : U_CDECL_BEGIN
     769             : 
     770             : 
     771             : static int32_t U_CALLCONV
     772           0 : ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
     773           0 :     int32_t value = 0;
     774           0 :     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
     775           0 :     uint32_t listOffset = myContext->listOffset;
     776             : 
     777           0 :     if (listOffset) {
     778           0 :         value = gMainTable.taggedAliasLists[listOffset];
     779             :     }
     780           0 :     return value;
     781             : }
     782             : 
     783             : static const char * U_CALLCONV
     784           0 : ucnv_io_nextStandardAliases(UEnumeration *enumerator,
     785             :                             int32_t* resultLength,
     786             :                             UErrorCode * /*pErrorCode*/)
     787             : {
     788           0 :     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
     789           0 :     uint32_t listOffset = myContext->listOffset;
     790             : 
     791           0 :     if (listOffset) {
     792           0 :         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
     793           0 :         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
     794             : 
     795           0 :         if (myContext->listIdx < listCount) {
     796           0 :             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
     797           0 :             if (resultLength) {
     798           0 :                 *resultLength = (int32_t)uprv_strlen(myStr);
     799             :             }
     800           0 :             return myStr;
     801             :         }
     802             :     }
     803             :     /* Either we accessed a zero length list, or we enumerated too far. */
     804           0 :     if (resultLength) {
     805           0 :         *resultLength = 0;
     806             :     }
     807           0 :     return NULL;
     808             : }
     809             : 
     810             : static void U_CALLCONV
     811           0 : ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
     812           0 :     ((UAliasContext *)(enumerator->context))->listIdx = 0;
     813           0 : }
     814             : 
     815             : static void U_CALLCONV
     816           0 : ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
     817           0 :     uprv_free(enumerator->context);
     818           0 :     uprv_free(enumerator);
     819           0 : }
     820             : 
     821             : U_CDECL_END
     822             : 
     823             : /* Enumerate the aliases for the specified converter and standard tag */
     824             : static const UEnumeration gEnumAliases = {
     825             :     NULL,
     826             :     NULL,
     827             :     ucnv_io_closeUEnumeration,
     828             :     ucnv_io_countStandardAliases,
     829             :     uenum_unextDefault,
     830             :     ucnv_io_nextStandardAliases,
     831             :     ucnv_io_resetStandardAliases
     832             : };
     833             : 
     834             : U_CAPI UEnumeration * U_EXPORT2
     835           0 : ucnv_openStandardNames(const char *convName,
     836             :                        const char *standard,
     837             :                        UErrorCode *pErrorCode)
     838             : {
     839           0 :     UEnumeration *myEnum = NULL;
     840           0 :     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
     841           0 :         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
     842             : 
     843             :         /* When listOffset == 0, we want to acknowledge that the
     844             :            converter name and standard are okay, but there
     845             :            is nothing to enumerate. */
     846           0 :         if (listOffset < gMainTable.taggedAliasListsSize) {
     847             :             UAliasContext *myContext;
     848             : 
     849           0 :             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
     850           0 :             if (myEnum == NULL) {
     851           0 :                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     852           0 :                 return NULL;
     853             :             }
     854           0 :             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
     855           0 :             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
     856           0 :             if (myContext == NULL) {
     857           0 :                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
     858           0 :                 uprv_free(myEnum);
     859           0 :                 return NULL;
     860             :             }
     861           0 :             myContext->listOffset = listOffset;
     862           0 :             myContext->listIdx = 0;
     863           0 :             myEnum->context = myContext;
     864             :         }
     865             :         /* else converter or tag not found */
     866             :     }
     867           0 :     return myEnum;
     868             : }
     869             : 
     870             : static uint16_t
     871           0 : ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
     872           0 :     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
     873           0 :         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
     874           0 :         if (convNum < gMainTable.converterListSize) {
     875             :             /* tagListNum - 1 is the ALL tag */
     876           0 :             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
     877             : 
     878           0 :             if (listOffset) {
     879           0 :                 return gMainTable.taggedAliasLists[listOffset];
     880             :             }
     881             :             /* else this shouldn't happen. internal program error */
     882             :         }
     883             :         /* else converter not found */
     884             :     }
     885           0 :     return 0;
     886             : }
     887             : 
     888             : static uint16_t
     889           0 : ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
     890           0 :     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
     891             :         uint32_t currAlias;
     892           0 :         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
     893           0 :         if (convNum < gMainTable.converterListSize) {
     894             :             /* tagListNum - 1 is the ALL tag */
     895           0 :             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
     896             : 
     897           0 :             if (listOffset) {
     898           0 :                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
     899             :                 /* +1 to skip listCount */
     900           0 :                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
     901             : 
     902           0 :                 for (currAlias = start; currAlias < listCount; currAlias++) {
     903           0 :                     aliases[currAlias] = GET_STRING(currList[currAlias]);
     904             :                 }
     905             :             }
     906             :             /* else this shouldn't happen. internal program error */
     907             :         }
     908             :         /* else converter not found */
     909             :     }
     910           0 :     return 0;
     911             : }
     912             : 
     913             : static const char *
     914           0 : ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
     915           0 :     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
     916           0 :         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
     917           0 :         if (convNum < gMainTable.converterListSize) {
     918             :             /* tagListNum - 1 is the ALL tag */
     919           0 :             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
     920             : 
     921           0 :             if (listOffset) {
     922           0 :                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
     923             :                 /* +1 to skip listCount */
     924           0 :                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
     925             : 
     926           0 :                 if (n < listCount)  {
     927           0 :                     return GET_STRING(currList[n]);
     928             :                 }
     929           0 :                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     930             :             }
     931             :             /* else this shouldn't happen. internal program error */
     932             :         }
     933             :         /* else converter not found */
     934             :     }
     935           0 :     return NULL;
     936             : }
     937             : 
     938             : static uint16_t
     939           0 : ucnv_io_countStandards(UErrorCode *pErrorCode) {
     940           0 :     if (haveAliasData(pErrorCode)) {
     941             :         /* Don't include the empty list */
     942           0 :         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
     943             :     }
     944             : 
     945           0 :     return 0;
     946             : }
     947             : 
     948             : U_CAPI const char * U_EXPORT2
     949           0 : ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
     950           0 :     if (haveAliasData(pErrorCode)) {
     951           0 :         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
     952           0 :             return GET_STRING(gMainTable.tagList[n]);
     953             :         }
     954           0 :         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     955             :     }
     956             : 
     957           0 :     return NULL;
     958             : }
     959             : 
     960             : U_CAPI const char * U_EXPORT2
     961           0 : ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
     962           0 :     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
     963           0 :         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
     964             : 
     965           0 :         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
     966           0 :             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
     967             : 
     968             :             /* Get the preferred name from this list */
     969           0 :             if (currList[0]) {
     970           0 :                 return GET_STRING(currList[0]);
     971             :             }
     972             :             /* else someone screwed up the alias table. */
     973             :             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
     974             :         }
     975             :     }
     976             : 
     977           0 :     return NULL;
     978             : }
     979             : 
     980             : U_CAPI uint16_t U_EXPORT2
     981           0 : ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
     982             : {
     983           0 :     return ucnv_io_countAliases(alias, pErrorCode);
     984             : }
     985             : 
     986             : 
     987             : U_CAPI const char* U_EXPORT2
     988           0 : ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
     989             : {
     990           0 :     return ucnv_io_getAlias(alias, n, pErrorCode);
     991             : }
     992             : 
     993             : U_CAPI void U_EXPORT2
     994           0 : ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
     995             : {
     996           0 :     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
     997           0 : }
     998             : 
     999             : U_CAPI uint16_t U_EXPORT2
    1000           0 : ucnv_countStandards(void)
    1001             : {
    1002           0 :     UErrorCode err = U_ZERO_ERROR;
    1003           0 :     return ucnv_io_countStandards(&err);
    1004             : }
    1005             : 
    1006             : U_CAPI const char * U_EXPORT2
    1007           0 : ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    1008           0 :     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    1009           0 :         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
    1010             : 
    1011           0 :         if (convNum < gMainTable.converterListSize) {
    1012           0 :             return GET_STRING(gMainTable.converterList[convNum]);
    1013             :         }
    1014             :     }
    1015             : 
    1016           0 :     return NULL;
    1017             : }
    1018             : 
    1019             : U_CDECL_BEGIN
    1020             : 
    1021             : 
    1022             : static int32_t U_CALLCONV
    1023           0 : ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
    1024           0 :     return gMainTable.converterListSize;
    1025             : }
    1026             : 
    1027             : static const char * U_CALLCONV
    1028           0 : ucnv_io_nextAllConverters(UEnumeration *enumerator,
    1029             :                             int32_t* resultLength,
    1030             :                             UErrorCode * /*pErrorCode*/)
    1031             : {
    1032           0 :     uint16_t *myContext = (uint16_t *)(enumerator->context);
    1033             : 
    1034           0 :     if (*myContext < gMainTable.converterListSize) {
    1035           0 :         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
    1036           0 :         if (resultLength) {
    1037           0 :             *resultLength = (int32_t)uprv_strlen(myStr);
    1038             :         }
    1039           0 :         return myStr;
    1040             :     }
    1041             :     /* Either we accessed a zero length list, or we enumerated too far. */
    1042           0 :     if (resultLength) {
    1043           0 :         *resultLength = 0;
    1044             :     }
    1045           0 :     return NULL;
    1046             : }
    1047             : 
    1048             : static void U_CALLCONV
    1049           0 : ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    1050           0 :     *((uint16_t *)(enumerator->context)) = 0;
    1051           0 : }
    1052             : U_CDECL_END
    1053             : static const UEnumeration gEnumAllConverters = {
    1054             :     NULL,
    1055             :     NULL,
    1056             :     ucnv_io_closeUEnumeration,
    1057             :     ucnv_io_countAllConverters,
    1058             :     uenum_unextDefault,
    1059             :     ucnv_io_nextAllConverters,
    1060             :     ucnv_io_resetAllConverters
    1061             : };
    1062             : 
    1063             : U_CAPI UEnumeration * U_EXPORT2
    1064           0 : ucnv_openAllNames(UErrorCode *pErrorCode) {
    1065           0 :     UEnumeration *myEnum = NULL;
    1066           0 :     if (haveAliasData(pErrorCode)) {
    1067             :         uint16_t *myContext;
    1068             : 
    1069           0 :         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
    1070           0 :         if (myEnum == NULL) {
    1071           0 :             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    1072           0 :             return NULL;
    1073             :         }
    1074           0 :         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
    1075           0 :         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
    1076           0 :         if (myContext == NULL) {
    1077           0 :             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    1078           0 :             uprv_free(myEnum);
    1079           0 :             return NULL;
    1080             :         }
    1081           0 :         *myContext = 0;
    1082           0 :         myEnum->context = myContext;
    1083             :     }
    1084           0 :     return myEnum;
    1085             : }
    1086             : 
    1087             : U_CAPI uint16_t
    1088           3 : ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
    1089           3 :     if (haveAliasData(pErrorCode)) {
    1090           3 :         return (uint16_t)gMainTable.converterListSize;
    1091             :     }
    1092           0 :     return 0;
    1093             : }
    1094             : 
    1095             : /* alias table swapping ----------------------------------------------------- */
    1096             : 
    1097             : U_CDECL_BEGIN
    1098             : 
    1099             : typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
    1100             : U_CDECL_END
    1101             : 
    1102             : 
    1103             : /*
    1104             :  * row of a temporary array
    1105             :  *
    1106             :  * gets platform-endian charset string indexes and sorting indexes;
    1107             :  * after sorting this array by strings, the actual arrays are permutated
    1108             :  * according to the sorting indexes
    1109             :  */
    1110             : typedef struct TempRow {
    1111             :     uint16_t strIndex, sortIndex;
    1112             : } TempRow;
    1113             : 
    1114             : typedef struct TempAliasTable {
    1115             :     const char *chars;
    1116             :     TempRow *rows;
    1117             :     uint16_t *resort;
    1118             :     StripForCompareFn *stripForCompare;
    1119             : } TempAliasTable;
    1120             : 
    1121             : enum {
    1122             :     STACK_ROW_CAPACITY=500
    1123             : };
    1124             : 
    1125             : static int32_t U_CALLCONV
    1126           0 : io_compareRows(const void *context, const void *left, const void *right) {
    1127             :     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
    1128             :          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
    1129             : 
    1130           0 :     TempAliasTable *tempTable=(TempAliasTable *)context;
    1131           0 :     const char *chars=tempTable->chars;
    1132             : 
    1133           0 :     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
    1134             :                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
    1135             : }
    1136             : 
    1137             : U_CAPI int32_t U_EXPORT2
    1138           0 : ucnv_swapAliases(const UDataSwapper *ds,
    1139             :                  const void *inData, int32_t length, void *outData,
    1140             :                  UErrorCode *pErrorCode) {
    1141             :     const UDataInfo *pInfo;
    1142             :     int32_t headerSize;
    1143             : 
    1144             :     const uint16_t *inTable;
    1145             :     const uint32_t *inSectionSizes;
    1146             :     uint32_t toc[offsetsCount];
    1147             :     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
    1148             :     uint32_t i, count, tocLength, topOffset;
    1149             : 
    1150             :     TempRow rows[STACK_ROW_CAPACITY];
    1151             :     uint16_t resort[STACK_ROW_CAPACITY];
    1152             :     TempAliasTable tempTable;
    1153             : 
    1154             :     /* udata_swapDataHeader checks the arguments */
    1155           0 :     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    1156           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1157           0 :         return 0;
    1158             :     }
    1159             : 
    1160             :     /* check data format and format version */
    1161           0 :     pInfo=(const UDataInfo *)((const char *)inData+4);
    1162           0 :     if(!(
    1163           0 :         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    1164           0 :         pInfo->dataFormat[1]==0x76 &&
    1165           0 :         pInfo->dataFormat[2]==0x41 &&
    1166           0 :         pInfo->dataFormat[3]==0x6c &&
    1167           0 :         pInfo->formatVersion[0]==3
    1168             :     )) {
    1169           0 :         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
    1170           0 :                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    1171           0 :                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    1172           0 :                          pInfo->formatVersion[0]);
    1173           0 :         *pErrorCode=U_UNSUPPORTED_ERROR;
    1174           0 :         return 0;
    1175             :     }
    1176             : 
    1177             :     /* an alias table must contain at least the table of contents array */
    1178           0 :     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
    1179           0 :         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
    1180           0 :                          length-headerSize);
    1181           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    1182           0 :         return 0;
    1183             :     }
    1184             : 
    1185           0 :     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
    1186           0 :     inTable=(const uint16_t *)inSectionSizes;
    1187           0 :     uprv_memset(toc, 0, sizeof(toc));
    1188           0 :     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
    1189           0 :     if(tocLength<minTocLength || offsetsCount<=tocLength) {
    1190           0 :         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
    1191           0 :         *pErrorCode=U_INVALID_FORMAT_ERROR;
    1192           0 :         return 0;
    1193             :     }
    1194             : 
    1195             :     /* read the known part of the table of contents */
    1196           0 :     for(i=converterListIndex; i<=tocLength; ++i) {
    1197           0 :         toc[i]=ds->readUInt32(inSectionSizes[i]);
    1198             :     }
    1199             : 
    1200             :     /* compute offsets */
    1201           0 :     uprv_memset(offsets, 0, sizeof(offsets));
    1202           0 :     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
    1203           0 :     for(i=tagListIndex; i<=tocLength; ++i) {
    1204           0 :         offsets[i]=offsets[i-1]+toc[i-1];
    1205             :     }
    1206             : 
    1207             :     /* compute the overall size of the after-header data, in numbers of 16-bit units */
    1208           0 :     topOffset=offsets[i-1]+toc[i-1];
    1209             : 
    1210           0 :     if(length>=0) {
    1211             :         uint16_t *outTable;
    1212             :         const uint16_t *p, *p2;
    1213             :         uint16_t *q, *q2;
    1214             :         uint16_t oldIndex;
    1215             : 
    1216           0 :         if((length-headerSize)<(2*(int32_t)topOffset)) {
    1217           0 :             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
    1218           0 :                              length-headerSize);
    1219           0 :             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    1220           0 :             return 0;
    1221             :         }
    1222             : 
    1223           0 :         outTable=(uint16_t *)((char *)outData+headerSize);
    1224             : 
    1225             :         /* swap the entire table of contents */
    1226           0 :         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
    1227             : 
    1228             :         /* swap unormalized strings & normalized strings */
    1229           0 :         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
    1230           0 :                              outTable+offsets[stringTableIndex], pErrorCode);
    1231           0 :         if(U_FAILURE(*pErrorCode)) {
    1232           0 :             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
    1233           0 :             return 0;
    1234             :         }
    1235             : 
    1236           0 :         if(ds->inCharset==ds->outCharset) {
    1237             :             /* no need to sort, just swap all 16-bit values together */
    1238           0 :             ds->swapArray16(ds,
    1239           0 :                             inTable+offsets[converterListIndex],
    1240           0 :                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
    1241           0 :                             outTable+offsets[converterListIndex],
    1242           0 :                             pErrorCode);
    1243             :         } else {
    1244             :             /* allocate the temporary table for sorting */
    1245           0 :             count=toc[aliasListIndex];
    1246             : 
    1247           0 :             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
    1248             : 
    1249           0 :             if(count<=STACK_ROW_CAPACITY) {
    1250           0 :                 tempTable.rows=rows;
    1251           0 :                 tempTable.resort=resort;
    1252             :             } else {
    1253           0 :                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
    1254           0 :                 if(tempTable.rows==NULL) {
    1255             :                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
    1256           0 :                                      count);
    1257           0 :                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    1258           0 :                     return 0;
    1259             :                 }
    1260           0 :                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
    1261             :             }
    1262             : 
    1263           0 :             if(ds->outCharset==U_ASCII_FAMILY) {
    1264           0 :                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
    1265             :             } else /* U_EBCDIC_FAMILY */ {
    1266           0 :                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
    1267             :             }
    1268             : 
    1269             :             /*
    1270             :              * Sort unique aliases+mapped names.
    1271             :              *
    1272             :              * We need to sort the list again by outCharset strings because they
    1273             :              * sort differently for different charset families.
    1274             :              * First we set up a temporary table with the string indexes and
    1275             :              * sorting indexes and sort that.
    1276             :              * Then we permutate and copy/swap the actual values.
    1277             :              */
    1278           0 :             p=inTable+offsets[aliasListIndex];
    1279           0 :             q=outTable+offsets[aliasListIndex];
    1280             : 
    1281           0 :             p2=inTable+offsets[untaggedConvArrayIndex];
    1282           0 :             q2=outTable+offsets[untaggedConvArrayIndex];
    1283             : 
    1284           0 :             for(i=0; i<count; ++i) {
    1285           0 :                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
    1286           0 :                 tempTable.rows[i].sortIndex=(uint16_t)i;
    1287             :             }
    1288             : 
    1289           0 :             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
    1290             :                            io_compareRows, &tempTable,
    1291           0 :                            FALSE, pErrorCode);
    1292             : 
    1293           0 :             if(U_SUCCESS(*pErrorCode)) {
    1294             :                 /* copy/swap/permutate items */
    1295           0 :                 if(p!=q) {
    1296           0 :                     for(i=0; i<count; ++i) {
    1297           0 :                         oldIndex=tempTable.rows[i].sortIndex;
    1298           0 :                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
    1299           0 :                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
    1300             :                     }
    1301             :                 } else {
    1302             :                     /*
    1303             :                      * If we swap in-place, then the permutation must use another
    1304             :                      * temporary array (tempTable.resort)
    1305             :                      * before the results are copied to the outBundle.
    1306             :                      */
    1307           0 :                     uint16_t *r=tempTable.resort;
    1308             : 
    1309           0 :                     for(i=0; i<count; ++i) {
    1310           0 :                         oldIndex=tempTable.rows[i].sortIndex;
    1311           0 :                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
    1312             :                     }
    1313           0 :                     uprv_memcpy(q, r, 2*(size_t)count);
    1314             : 
    1315           0 :                     for(i=0; i<count; ++i) {
    1316           0 :                         oldIndex=tempTable.rows[i].sortIndex;
    1317           0 :                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
    1318             :                     }
    1319           0 :                     uprv_memcpy(q2, r, 2*(size_t)count);
    1320             :                 }
    1321             :             }
    1322             : 
    1323           0 :             if(tempTable.rows!=rows) {
    1324           0 :                 uprv_free(tempTable.rows);
    1325             :             }
    1326             : 
    1327           0 :             if(U_FAILURE(*pErrorCode)) {
    1328             :                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
    1329           0 :                                  count);
    1330           0 :                 return 0;
    1331             :             }
    1332             : 
    1333             :             /* swap remaining 16-bit values */
    1334           0 :             ds->swapArray16(ds,
    1335           0 :                             inTable+offsets[converterListIndex],
    1336           0 :                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
    1337           0 :                             outTable+offsets[converterListIndex],
    1338           0 :                             pErrorCode);
    1339           0 :             ds->swapArray16(ds,
    1340           0 :                             inTable+offsets[taggedAliasArrayIndex],
    1341           0 :                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
    1342           0 :                             outTable+offsets[taggedAliasArrayIndex],
    1343           0 :                             pErrorCode);
    1344             :         }
    1345             :     }
    1346             : 
    1347           0 :     return headerSize+2*(int32_t)topOffset;
    1348             : }
    1349             : 
    1350             : #endif
    1351             : 
    1352             : 
    1353             : /*
    1354             :  * Hey, Emacs, please set the following:
    1355             :  *
    1356             :  * Local Variables:
    1357             :  * indent-tabs-mode: nil
    1358             :  * End:
    1359             :  *
    1360             :  */

Generated by: LCOV version 1.13