LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnvsel.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 393 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 15 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2008-2011, International Business Machines
       7             : *   Corporation, Google and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : */
      11             : // Author : eldawy@google.com (Mohamed Eldawy)
      12             : // ucnvsel.cpp
      13             : //
      14             : // Purpose: To generate a list of encodings capable of handling
      15             : // a given Unicode text
      16             : //
      17             : // Started 09-April-2008
      18             : 
      19             : /**
      20             :  * \file
      21             :  *
      22             :  * This is an implementation of an encoding selector.
      23             :  * The goal is, given a unicode string, find the encodings
      24             :  * this string can be mapped to. To make processing faster
      25             :  * a trie is built when you call ucnvsel_open() that
      26             :  * stores all encodings a codepoint can map to
      27             :  */
      28             : 
      29             : #include "unicode/ucnvsel.h"
      30             : 
      31             : #if !UCONFIG_NO_CONVERSION
      32             : 
      33             : #include <string.h>
      34             : 
      35             : #include "unicode/uchar.h"
      36             : #include "unicode/uniset.h"
      37             : #include "unicode/ucnv.h"
      38             : #include "unicode/ustring.h"
      39             : #include "unicode/uchriter.h"
      40             : #include "utrie2.h"
      41             : #include "propsvec.h"
      42             : #include "uassert.h"
      43             : #include "ucmndata.h"
      44             : #include "uenumimp.h"
      45             : #include "cmemory.h"
      46             : #include "cstring.h"
      47             : 
      48             : U_NAMESPACE_USE
      49             : 
      50             : struct UConverterSelector {
      51             :   UTrie2 *trie;              // 16 bit trie containing offsets into pv
      52             :   uint32_t* pv;              // table of bits!
      53             :   int32_t pvCount;
      54             :   char** encodings;          // which encodings did user ask to use?
      55             :   int32_t encodingsCount;
      56             :   int32_t encodingStrLength;
      57             :   uint8_t* swapped;
      58             :   UBool ownPv, ownEncodingStrings;
      59             : };
      60             : 
      61           0 : static void generateSelectorData(UConverterSelector* result,
      62             :                                  UPropsVectors *upvec,
      63             :                                  const USet* excludedCodePoints,
      64             :                                  const UConverterUnicodeSet whichSet,
      65             :                                  UErrorCode* status) {
      66           0 :   if (U_FAILURE(*status)) {
      67           0 :     return;
      68             :   }
      69             : 
      70           0 :   int32_t columns = (result->encodingsCount+31)/32;
      71             : 
      72             :   // set errorValue to all-ones
      73           0 :   for (int32_t col = 0; col < columns; col++) {
      74             :     upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP,
      75           0 :                    col, ~0, ~0, status);
      76             :   }
      77             : 
      78           0 :   for (int32_t i = 0; i < result->encodingsCount; ++i) {
      79             :     uint32_t mask;
      80             :     uint32_t column;
      81             :     int32_t item_count;
      82             :     int32_t j;
      83           0 :     UConverter* test_converter = ucnv_open(result->encodings[i], status);
      84           0 :     if (U_FAILURE(*status)) {
      85           0 :       return;
      86             :     }
      87             :     USet* unicode_point_set;
      88           0 :     unicode_point_set = uset_open(1, 0);  // empty set
      89             : 
      90             :     ucnv_getUnicodeSet(test_converter, unicode_point_set,
      91           0 :                        whichSet, status);
      92           0 :     if (U_FAILURE(*status)) {
      93           0 :       ucnv_close(test_converter);
      94           0 :       return;
      95             :     }
      96             : 
      97           0 :     column = i / 32;
      98           0 :     mask = 1 << (i%32);
      99             :     // now iterate over intervals on set i!
     100           0 :     item_count = uset_getItemCount(unicode_point_set);
     101             : 
     102           0 :     for (j = 0; j < item_count; ++j) {
     103             :       UChar32 start_char;
     104             :       UChar32 end_char;
     105           0 :       UErrorCode smallStatus = U_ZERO_ERROR;
     106             :       uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0,
     107           0 :                    &smallStatus);
     108           0 :       if (U_FAILURE(smallStatus)) {
     109             :         // this will be reached for the converters that fill the set with
     110             :         // strings. Those should be ignored by our system
     111             :       } else {
     112           0 :         upvec_setValue(upvec, start_char, end_char, column, ~0, mask,
     113           0 :                        status);
     114             :       }
     115             :     }
     116           0 :     ucnv_close(test_converter);
     117           0 :     uset_close(unicode_point_set);
     118           0 :     if (U_FAILURE(*status)) {
     119           0 :       return;
     120             :     }
     121             :   }
     122             : 
     123             :   // handle excluded encodings! Simply set their values to all 1's in the upvec
     124           0 :   if (excludedCodePoints) {
     125           0 :     int32_t item_count = uset_getItemCount(excludedCodePoints);
     126           0 :     for (int32_t j = 0; j < item_count; ++j) {
     127             :       UChar32 start_char;
     128             :       UChar32 end_char;
     129             : 
     130             :       uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0,
     131           0 :                    status);
     132           0 :       for (int32_t col = 0; col < columns; col++) {
     133             :         upvec_setValue(upvec, start_char, end_char, col, ~0, ~0,
     134           0 :                       status);
     135             :       }
     136             :     }
     137             :   }
     138             : 
     139             :   // alright. Now, let's put things in the same exact form you'd get when you
     140             :   // unserialize things.
     141           0 :   result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status);
     142           0 :   result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status);
     143           0 :   result->pvCount *= columns;  // number of uint32_t = rows * columns
     144           0 :   result->ownPv = TRUE;
     145             : }
     146             : 
     147             : /* open a selector. If converterListSize is 0, build for all converters.
     148             :    If excludedCodePoints is NULL, don't exclude any codepoints */
     149             : U_CAPI UConverterSelector* U_EXPORT2
     150           0 : ucnvsel_open(const char* const*  converterList, int32_t converterListSize,
     151             :              const USet* excludedCodePoints,
     152             :              const UConverterUnicodeSet whichSet, UErrorCode* status) {
     153             :   // check if already failed
     154           0 :   if (U_FAILURE(*status)) {
     155           0 :     return NULL;
     156             :   }
     157             :   // ensure args make sense!
     158           0 :   if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) {
     159           0 :     *status = U_ILLEGAL_ARGUMENT_ERROR;
     160           0 :     return NULL;
     161             :   }
     162             : 
     163             :   // allocate a new converter
     164             :   LocalUConverterSelectorPointer newSelector(
     165           0 :     (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)));
     166           0 :   if (newSelector.isNull()) {
     167           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     168           0 :     return NULL;
     169             :   }
     170           0 :   uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector));
     171             : 
     172           0 :   if (converterListSize == 0) {
     173           0 :     converterList = NULL;
     174           0 :     converterListSize = ucnv_countAvailable();
     175             :   }
     176           0 :   newSelector->encodings =
     177           0 :     (char**)uprv_malloc(converterListSize * sizeof(char*));
     178           0 :   if (!newSelector->encodings) {
     179           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     180           0 :     return NULL;
     181             :   }
     182           0 :   newSelector->encodings[0] = NULL;  // now we can call ucnvsel_close()
     183             : 
     184             :   // make a backup copy of the list of converters
     185           0 :   int32_t totalSize = 0;
     186             :   int32_t i;
     187           0 :   for (i = 0; i < converterListSize; i++) {
     188           0 :     totalSize +=
     189           0 :       (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1;
     190             :   }
     191             :   // 4-align the totalSize to 4-align the size of the serialized form
     192           0 :   int32_t encodingStrPadding = totalSize & 3;
     193           0 :   if (encodingStrPadding != 0) {
     194           0 :     encodingStrPadding = 4 - encodingStrPadding;
     195             :   }
     196           0 :   newSelector->encodingStrLength = totalSize += encodingStrPadding;
     197           0 :   char* allStrings = (char*) uprv_malloc(totalSize);
     198           0 :   if (!allStrings) {
     199           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     200           0 :     return NULL;
     201             :   }
     202             : 
     203           0 :   for (i = 0; i < converterListSize; i++) {
     204           0 :     newSelector->encodings[i] = allStrings;
     205           0 :     uprv_strcpy(newSelector->encodings[i],
     206           0 :                 converterList != NULL ? converterList[i] : ucnv_getAvailableName(i));
     207           0 :     allStrings += uprv_strlen(newSelector->encodings[i]) + 1;
     208             :   }
     209           0 :   while (encodingStrPadding > 0) {
     210           0 :     *allStrings++ = 0;
     211           0 :     --encodingStrPadding;
     212             :   }
     213             : 
     214           0 :   newSelector->ownEncodingStrings = TRUE;
     215           0 :   newSelector->encodingsCount = converterListSize;
     216           0 :   UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status);
     217           0 :   generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status);
     218           0 :   upvec_close(upvec);
     219             : 
     220           0 :   if (U_FAILURE(*status)) {
     221           0 :     return NULL;
     222             :   }
     223             : 
     224           0 :   return newSelector.orphan();
     225             : }
     226             : 
     227             : /* close opened selector */
     228             : U_CAPI void U_EXPORT2
     229           0 : ucnvsel_close(UConverterSelector *sel) {
     230           0 :   if (!sel) {
     231           0 :     return;
     232             :   }
     233           0 :   if (sel->ownEncodingStrings) {
     234           0 :     uprv_free(sel->encodings[0]);
     235             :   }
     236           0 :   uprv_free(sel->encodings);
     237           0 :   if (sel->ownPv) {
     238           0 :     uprv_free(sel->pv);
     239             :   }
     240           0 :   utrie2_close(sel->trie);
     241           0 :   uprv_free(sel->swapped);
     242           0 :   uprv_free(sel);
     243             : }
     244             : 
     245             : static const UDataInfo dataInfo = {
     246             :   sizeof(UDataInfo),
     247             :   0,
     248             : 
     249             :   U_IS_BIG_ENDIAN,
     250             :   U_CHARSET_FAMILY,
     251             :   U_SIZEOF_UCHAR,
     252             :   0,
     253             : 
     254             :   { 0x43, 0x53, 0x65, 0x6c },   /* dataFormat="CSel" */
     255             :   { 1, 0, 0, 0 },               /* formatVersion */
     256             :   { 0, 0, 0, 0 }                /* dataVersion */
     257             : };
     258             : 
     259             : enum {
     260             :   UCNVSEL_INDEX_TRIE_SIZE,      // trie size in bytes
     261             :   UCNVSEL_INDEX_PV_COUNT,       // number of uint32_t in the bit vectors
     262             :   UCNVSEL_INDEX_NAMES_COUNT,    // number of encoding names
     263             :   UCNVSEL_INDEX_NAMES_LENGTH,   // number of encoding name bytes including padding
     264             :   UCNVSEL_INDEX_SIZE = 15,      // bytes following the DataHeader
     265             :   UCNVSEL_INDEX_COUNT = 16
     266             : };
     267             : 
     268             : /*
     269             :  * Serialized form of a UConverterSelector, formatVersion 1:
     270             :  *
     271             :  * The serialized form begins with a standard ICU DataHeader with a UDataInfo
     272             :  * as the template above.
     273             :  * This is followed by:
     274             :  *   int32_t indexes[UCNVSEL_INDEX_COUNT];          // see index entry constants above
     275             :  *   serialized UTrie2;                             // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes
     276             :  *   uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]];  // bit vectors
     277             :  *   char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]];  // NUL-terminated strings + padding
     278             :  */
     279             : 
     280             : /* serialize a selector */
     281             : U_CAPI int32_t U_EXPORT2
     282           0 : ucnvsel_serialize(const UConverterSelector* sel,
     283             :                   void* buffer, int32_t bufferCapacity, UErrorCode* status) {
     284             :   // check if already failed
     285           0 :   if (U_FAILURE(*status)) {
     286           0 :     return 0;
     287             :   }
     288             :   // ensure args make sense!
     289           0 :   uint8_t *p = (uint8_t *)buffer;
     290           0 :   if (bufferCapacity < 0 ||
     291           0 :       (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
     292             :   ) {
     293           0 :     *status = U_ILLEGAL_ARGUMENT_ERROR;
     294           0 :     return 0;
     295             :   }
     296             :   // add up the size of the serialized form
     297           0 :   int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status);
     298           0 :   if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) {
     299           0 :     return 0;
     300             :   }
     301           0 :   *status = U_ZERO_ERROR;
     302             : 
     303             :   DataHeader header;
     304           0 :   uprv_memset(&header, 0, sizeof(header));
     305           0 :   header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15);
     306           0 :   header.dataHeader.magic1 = 0xda;
     307           0 :   header.dataHeader.magic2 = 0x27;
     308           0 :   uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo));
     309             : 
     310             :   int32_t indexes[UCNVSEL_INDEX_COUNT] = {
     311             :     serializedTrieSize,
     312           0 :     sel->pvCount,
     313           0 :     sel->encodingsCount,
     314           0 :     sel->encodingStrLength
     315           0 :   };
     316             : 
     317             :   int32_t totalSize =
     318           0 :     header.dataHeader.headerSize +
     319           0 :     (int32_t)sizeof(indexes) +
     320           0 :     serializedTrieSize +
     321           0 :     sel->pvCount * 4 +
     322           0 :     sel->encodingStrLength;
     323           0 :   indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize;
     324           0 :   if (totalSize > bufferCapacity) {
     325           0 :     *status = U_BUFFER_OVERFLOW_ERROR;
     326           0 :     return totalSize;
     327             :   }
     328             :   // ok, save!
     329           0 :   int32_t length = header.dataHeader.headerSize;
     330           0 :   uprv_memcpy(p, &header, sizeof(header));
     331           0 :   uprv_memset(p + sizeof(header), 0, length - sizeof(header));
     332           0 :   p += length;
     333             : 
     334           0 :   length = (int32_t)sizeof(indexes);
     335           0 :   uprv_memcpy(p, indexes, length);
     336           0 :   p += length;
     337             : 
     338           0 :   utrie2_serialize(sel->trie, p, serializedTrieSize, status);
     339           0 :   p += serializedTrieSize;
     340             : 
     341           0 :   length = sel->pvCount * 4;
     342           0 :   uprv_memcpy(p, sel->pv, length);
     343           0 :   p += length;
     344             : 
     345           0 :   uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength);
     346           0 :   p += sel->encodingStrLength;
     347             : 
     348           0 :   return totalSize;
     349             : }
     350             : 
     351             : /**
     352             :  * swap a selector into the desired Endianness and Asciiness of
     353             :  * the system. Just as FYI, selectors are always saved in the format
     354             :  * of the system that created them. They are only converted if used
     355             :  * on another system. In other words, selectors created on different
     356             :  * system can be different even if the params are identical (endianness
     357             :  * and Asciiness differences only)
     358             :  *
     359             :  * @param ds pointer to data swapper containing swapping info
     360             :  * @param inData pointer to incoming data
     361             :  * @param length length of inData in bytes
     362             :  * @param outData pointer to output data. Capacity should
     363             :  *                be at least equal to capacity of inData
     364             :  * @param status an in/out ICU UErrorCode
     365             :  * @return 0 on failure, number of bytes swapped on success
     366             :  *         number of bytes swapped can be smaller than length
     367             :  */
     368             : static int32_t
     369           0 : ucnvsel_swap(const UDataSwapper *ds,
     370             :              const void *inData, int32_t length,
     371             :              void *outData, UErrorCode *status) {
     372             :   /* udata_swapDataHeader checks the arguments */
     373           0 :   int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status);
     374           0 :   if(U_FAILURE(*status)) {
     375           0 :     return 0;
     376             :   }
     377             : 
     378             :   /* check data format and format version */
     379           0 :   const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
     380           0 :   if(!(
     381           0 :     pInfo->dataFormat[0] == 0x43 &&  /* dataFormat="CSel" */
     382           0 :     pInfo->dataFormat[1] == 0x53 &&
     383           0 :     pInfo->dataFormat[2] == 0x65 &&
     384           0 :     pInfo->dataFormat[3] == 0x6c
     385             :   )) {
     386           0 :     udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n",
     387           0 :                      pInfo->dataFormat[0], pInfo->dataFormat[1],
     388           0 :                      pInfo->dataFormat[2], pInfo->dataFormat[3]);
     389           0 :     *status = U_INVALID_FORMAT_ERROR;
     390           0 :     return 0;
     391             :   }
     392           0 :   if(pInfo->formatVersion[0] != 1) {
     393           0 :     udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n",
     394           0 :                      pInfo->formatVersion[0]);
     395           0 :     *status = U_UNSUPPORTED_ERROR;
     396           0 :     return 0;
     397             :   }
     398             : 
     399           0 :   if(length >= 0) {
     400           0 :     length -= headerSize;
     401           0 :     if(length < 16*4) {
     402             :       udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n",
     403           0 :                        length);
     404           0 :       *status = U_INDEX_OUTOFBOUNDS_ERROR;
     405           0 :       return 0;
     406             :     }
     407             :   }
     408             : 
     409           0 :   const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
     410           0 :   uint8_t *outBytes = (uint8_t *)outData + headerSize;
     411             : 
     412             :   /* read the indexes */
     413           0 :   const int32_t *inIndexes = (const int32_t *)inBytes;
     414             :   int32_t indexes[16];
     415             :   int32_t i;
     416           0 :   for(i = 0; i < 16; ++i) {
     417           0 :     indexes[i] = udata_readInt32(ds, inIndexes[i]);
     418             :   }
     419             : 
     420             :   /* get the total length of the data */
     421           0 :   int32_t size = indexes[UCNVSEL_INDEX_SIZE];
     422           0 :   if(length >= 0) {
     423           0 :     if(length < size) {
     424             :       udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n",
     425           0 :                        length);
     426           0 :       *status = U_INDEX_OUTOFBOUNDS_ERROR;
     427           0 :       return 0;
     428             :     }
     429             : 
     430             :     /* copy the data for inaccessible bytes */
     431           0 :     if(inBytes != outBytes) {
     432           0 :       uprv_memcpy(outBytes, inBytes, size);
     433             :     }
     434             : 
     435           0 :     int32_t offset = 0, count;
     436             : 
     437             :     /* swap the int32_t indexes[] */
     438           0 :     count = UCNVSEL_INDEX_COUNT*4;
     439           0 :     ds->swapArray32(ds, inBytes, count, outBytes, status);
     440           0 :     offset += count;
     441             : 
     442             :     /* swap the UTrie2 */
     443           0 :     count = indexes[UCNVSEL_INDEX_TRIE_SIZE];
     444           0 :     utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status);
     445           0 :     offset += count;
     446             : 
     447             :     /* swap the uint32_t pv[] */
     448           0 :     count = indexes[UCNVSEL_INDEX_PV_COUNT]*4;
     449           0 :     ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status);
     450           0 :     offset += count;
     451             : 
     452             :     /* swap the encoding names */
     453           0 :     count = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
     454           0 :     ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status);
     455           0 :     offset += count;
     456             : 
     457           0 :     U_ASSERT(offset == size);
     458             :   }
     459             : 
     460           0 :   return headerSize + size;
     461             : }
     462             : 
     463             : /* unserialize a selector */
     464             : U_CAPI UConverterSelector* U_EXPORT2
     465           0 : ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) {
     466             :   // check if already failed
     467           0 :   if (U_FAILURE(*status)) {
     468           0 :     return NULL;
     469             :   }
     470             :   // ensure args make sense!
     471           0 :   const uint8_t *p = (const uint8_t *)buffer;
     472           0 :   if (length <= 0 ||
     473           0 :       (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0)))
     474             :   ) {
     475           0 :     *status = U_ILLEGAL_ARGUMENT_ERROR;
     476           0 :     return NULL;
     477             :   }
     478             :   // header
     479           0 :   if (length < 32) {
     480             :     // not even enough space for a minimal header
     481           0 :     *status = U_INDEX_OUTOFBOUNDS_ERROR;
     482           0 :     return NULL;
     483             :   }
     484           0 :   const DataHeader *pHeader = (const DataHeader *)p;
     485           0 :   if (!(
     486           0 :     pHeader->dataHeader.magic1==0xda &&
     487           0 :     pHeader->dataHeader.magic2==0x27 &&
     488           0 :     pHeader->info.dataFormat[0] == 0x43 &&
     489           0 :     pHeader->info.dataFormat[1] == 0x53 &&
     490           0 :     pHeader->info.dataFormat[2] == 0x65 &&
     491           0 :     pHeader->info.dataFormat[3] == 0x6c
     492             :   )) {
     493             :     /* header not valid or dataFormat not recognized */
     494           0 :     *status = U_INVALID_FORMAT_ERROR;
     495           0 :     return NULL;
     496             :   }
     497           0 :   if (pHeader->info.formatVersion[0] != 1) {
     498           0 :     *status = U_UNSUPPORTED_ERROR;
     499           0 :     return NULL;
     500             :   }
     501           0 :   uint8_t* swapped = NULL;
     502           0 :   if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN ||
     503           0 :       pHeader->info.charsetFamily != U_CHARSET_FAMILY
     504             :   ) {
     505             :     // swap the data
     506             :     UDataSwapper *ds =
     507           0 :       udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status);
     508           0 :     int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status);
     509           0 :     if (U_FAILURE(*status)) {
     510           0 :       udata_closeSwapper(ds);
     511           0 :       return NULL;
     512             :     }
     513           0 :     if (length < totalSize) {
     514           0 :       udata_closeSwapper(ds);
     515           0 :       *status = U_INDEX_OUTOFBOUNDS_ERROR;
     516           0 :       return NULL;
     517             :     }
     518           0 :     swapped = (uint8_t*)uprv_malloc(totalSize);
     519           0 :     if (swapped == NULL) {
     520           0 :       udata_closeSwapper(ds);
     521           0 :       *status = U_MEMORY_ALLOCATION_ERROR;
     522           0 :       return NULL;
     523             :     }
     524           0 :     ucnvsel_swap(ds, p, length, swapped, status);
     525           0 :     udata_closeSwapper(ds);
     526           0 :     if (U_FAILURE(*status)) {
     527           0 :       uprv_free(swapped);
     528           0 :       return NULL;
     529             :     }
     530           0 :     p = swapped;
     531           0 :     pHeader = (const DataHeader *)p;
     532             :   }
     533           0 :   if (length < (pHeader->dataHeader.headerSize + 16 * 4)) {
     534             :     // not even enough space for the header and the indexes
     535           0 :     uprv_free(swapped);
     536           0 :     *status = U_INDEX_OUTOFBOUNDS_ERROR;
     537           0 :     return NULL;
     538             :   }
     539           0 :   p += pHeader->dataHeader.headerSize;
     540           0 :   length -= pHeader->dataHeader.headerSize;
     541             :   // indexes
     542           0 :   const int32_t *indexes = (const int32_t *)p;
     543           0 :   if (length < indexes[UCNVSEL_INDEX_SIZE]) {
     544           0 :     uprv_free(swapped);
     545           0 :     *status = U_INDEX_OUTOFBOUNDS_ERROR;
     546           0 :     return NULL;
     547             :   }
     548           0 :   p += UCNVSEL_INDEX_COUNT * 4;
     549             :   // create and populate the selector object
     550           0 :   UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
     551             :   char **encodings =
     552           0 :     (char **)uprv_malloc(
     553           0 :       indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *));
     554           0 :   if (sel == NULL || encodings == NULL) {
     555           0 :     uprv_free(swapped);
     556           0 :     uprv_free(sel);
     557           0 :     uprv_free(encodings);
     558           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     559           0 :     return NULL;
     560             :   }
     561           0 :   uprv_memset(sel, 0, sizeof(UConverterSelector));
     562           0 :   sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT];
     563           0 :   sel->encodings = encodings;
     564           0 :   sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT];
     565           0 :   sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH];
     566           0 :   sel->swapped = swapped;
     567             :   // trie
     568           0 :   sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
     569             :                                         p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL,
     570             :                                         status);
     571           0 :   p += indexes[UCNVSEL_INDEX_TRIE_SIZE];
     572           0 :   if (U_FAILURE(*status)) {
     573           0 :     ucnvsel_close(sel);
     574           0 :     return NULL;
     575             :   }
     576             :   // bit vectors
     577           0 :   sel->pv = (uint32_t *)p;
     578           0 :   p += sel->pvCount * 4;
     579             :   // encoding names
     580           0 :   char* s = (char*)p;
     581           0 :   for (int32_t i = 0; i < sel->encodingsCount; ++i) {
     582           0 :     sel->encodings[i] = s;
     583           0 :     s += uprv_strlen(s) + 1;
     584             :   }
     585           0 :   p += sel->encodingStrLength;
     586             : 
     587           0 :   return sel;
     588             : }
     589             : 
     590             : // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
     591             : // iterate over the selected encodings
     592             : struct Enumerator {
     593             :   int16_t* index;
     594             :   int16_t length;
     595             :   int16_t cur;
     596             :   const UConverterSelector* sel;
     597             : };
     598             : 
     599             : U_CDECL_BEGIN
     600             : 
     601             : static void U_CALLCONV
     602           0 : ucnvsel_close_selector_iterator(UEnumeration *enumerator) {
     603           0 :   uprv_free(((Enumerator*)(enumerator->context))->index);
     604           0 :   uprv_free(enumerator->context);
     605           0 :   uprv_free(enumerator);
     606           0 : }
     607             : 
     608             : 
     609             : static int32_t U_CALLCONV
     610           0 : ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) {
     611             :   // check if already failed
     612           0 :   if (U_FAILURE(*status)) {
     613           0 :     return 0;
     614             :   }
     615           0 :   return ((Enumerator*)(enumerator->context))->length;
     616             : }
     617             : 
     618             : 
     619           0 : static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator,
     620             :                                                  int32_t* resultLength,
     621             :                                                  UErrorCode* status) {
     622             :   // check if already failed
     623           0 :   if (U_FAILURE(*status)) {
     624           0 :     return NULL;
     625             :   }
     626             : 
     627           0 :   int16_t cur = ((Enumerator*)(enumerator->context))->cur;
     628             :   const UConverterSelector* sel;
     629             :   const char* result;
     630           0 :   if (cur >= ((Enumerator*)(enumerator->context))->length) {
     631           0 :     return NULL;
     632             :   }
     633           0 :   sel = ((Enumerator*)(enumerator->context))->sel;
     634           0 :   result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ];
     635           0 :   ((Enumerator*)(enumerator->context))->cur++;
     636           0 :   if (resultLength) {
     637           0 :     *resultLength = (int32_t)uprv_strlen(result);
     638             :   }
     639           0 :   return result;
     640             : }
     641             : 
     642           0 : static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator,
     643             :                                            UErrorCode* status) {
     644             :   // check if already failed
     645           0 :   if (U_FAILURE(*status)) {
     646           0 :     return ;
     647             :   }
     648           0 :   ((Enumerator*)(enumerator->context))->cur = 0;
     649             : }
     650             : 
     651             : U_CDECL_END
     652             : 
     653             : 
     654             : static const UEnumeration defaultEncodings = {
     655             :   NULL,
     656             :     NULL,
     657             :     ucnvsel_close_selector_iterator,
     658             :     ucnvsel_count_encodings,
     659             :     uenum_unextDefault,
     660             :     ucnvsel_next_encoding, 
     661             :     ucnvsel_reset_iterator
     662             : };
     663             : 
     664             : 
     665             : // internal fn to intersect two sets of masks
     666             : // returns whether the mask has reduced to all zeros
     667           0 : static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) {
     668             :   int32_t i;
     669           0 :   uint32_t oredDest = 0;
     670           0 :   for (i = 0 ; i < len ; ++i) {
     671           0 :     oredDest |= (dest[i] &= source1[i]);
     672             :   }
     673           0 :   return oredDest == 0;
     674             : }
     675             : 
     676             : // internal fn to count how many 1's are there in a mask
     677             : // algorithm taken from  http://graphics.stanford.edu/~seander/bithacks.html
     678           0 : static int16_t countOnes(uint32_t* mask, int32_t len) {
     679           0 :   int32_t i, totalOnes = 0;
     680           0 :   for (i = 0 ; i < len ; ++i) {
     681           0 :     uint32_t ent = mask[i];
     682           0 :     for (; ent; totalOnes++)
     683             :     {
     684           0 :       ent &= ent - 1; // clear the least significant bit set
     685             :     }
     686             :   }
     687           0 :   return totalOnes;
     688             : }
     689             : 
     690             : 
     691             : /* internal function! */
     692           0 : static UEnumeration *selectForMask(const UConverterSelector* sel,
     693             :                                    uint32_t *mask, UErrorCode *status) {
     694             :   // this is the context we will use. Store a table of indices to which
     695             :   // encodings are legit.
     696           0 :   struct Enumerator* result = (Enumerator*)uprv_malloc(sizeof(Enumerator));
     697           0 :   if (result == NULL) {
     698           0 :     uprv_free(mask);
     699           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     700           0 :     return NULL;
     701             :   }
     702           0 :   result->index = NULL;  // this will be allocated later!
     703           0 :   result->length = result->cur = 0;
     704           0 :   result->sel = sel;
     705             : 
     706           0 :   UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
     707           0 :   if (en == NULL) {
     708             :     // TODO(markus): Combine Enumerator and UEnumeration into one struct.
     709           0 :     uprv_free(mask);
     710           0 :     uprv_free(result);
     711           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     712           0 :     return NULL;
     713             :   }
     714           0 :   memcpy(en, &defaultEncodings, sizeof(UEnumeration));
     715           0 :   en->context = result;
     716             : 
     717           0 :   int32_t columns = (sel->encodingsCount+31)/32;
     718           0 :   int16_t numOnes = countOnes(mask, columns);
     719             :   // now, we know the exact space we need for index
     720           0 :   if (numOnes > 0) {
     721           0 :     result->index = (int16_t*) uprv_malloc(numOnes * sizeof(int16_t));
     722             : 
     723             :     int32_t i, j;
     724           0 :     int16_t k = 0;
     725           0 :     for (j = 0 ; j < columns; j++) {
     726           0 :       uint32_t v = mask[j];
     727           0 :       for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) {
     728           0 :         if ((v & 1) != 0) {
     729           0 :           result->index[result->length++] = k;
     730             :         }
     731           0 :         v >>= 1;
     732             :       }
     733             :     }
     734             :   } //otherwise, index will remain NULL (and will never be touched by
     735             :     //the enumerator code anyway)
     736           0 :   uprv_free(mask);
     737           0 :   return en;
     738             : }
     739             : 
     740             : /* check a string against the selector - UTF16 version */
     741             : U_CAPI UEnumeration * U_EXPORT2
     742           0 : ucnvsel_selectForString(const UConverterSelector* sel,
     743             :                         const UChar *s, int32_t length, UErrorCode *status) {
     744             :   // check if already failed
     745           0 :   if (U_FAILURE(*status)) {
     746           0 :     return NULL;
     747             :   }
     748             :   // ensure args make sense!
     749           0 :   if (sel == NULL || (s == NULL && length != 0)) {
     750           0 :     *status = U_ILLEGAL_ARGUMENT_ERROR;
     751           0 :     return NULL;
     752             :   }
     753             : 
     754           0 :   int32_t columns = (sel->encodingsCount+31)/32;
     755           0 :   uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
     756           0 :   if (mask == NULL) {
     757           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     758           0 :     return NULL;
     759             :   }
     760           0 :   uprv_memset(mask, ~0, columns *4);
     761             : 
     762           0 :   if(s!=NULL) {
     763             :     const UChar *limit;
     764           0 :     if (length >= 0) {
     765           0 :       limit = s + length;
     766             :     } else {
     767           0 :       limit = NULL;
     768             :     }
     769             :     
     770           0 :     while (limit == NULL ? *s != 0 : s != limit) {
     771             :       UChar32 c;
     772             :       uint16_t pvIndex;
     773           0 :       UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex);
     774           0 :       if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
     775           0 :         break;
     776             :       }
     777             :     }
     778             :   }
     779           0 :   return selectForMask(sel, mask, status);
     780             : }
     781             : 
     782             : /* check a string against the selector - UTF8 version */
     783             : U_CAPI UEnumeration * U_EXPORT2
     784           0 : ucnvsel_selectForUTF8(const UConverterSelector* sel,
     785             :                       const char *s, int32_t length, UErrorCode *status) {
     786             :   // check if already failed
     787           0 :   if (U_FAILURE(*status)) {
     788           0 :     return NULL;
     789             :   }
     790             :   // ensure args make sense!
     791           0 :   if (sel == NULL || (s == NULL && length != 0)) {
     792           0 :     *status = U_ILLEGAL_ARGUMENT_ERROR;
     793           0 :     return NULL;
     794             :   }
     795             : 
     796           0 :   int32_t columns = (sel->encodingsCount+31)/32;
     797           0 :   uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4);
     798           0 :   if (mask == NULL) {
     799           0 :     *status = U_MEMORY_ALLOCATION_ERROR;
     800           0 :     return NULL;
     801             :   }
     802           0 :   uprv_memset(mask, ~0, columns *4);
     803             : 
     804           0 :   if (length < 0) {
     805           0 :     length = (int32_t)uprv_strlen(s);
     806             :   }
     807             : 
     808           0 :   if(s!=NULL) {
     809           0 :     const char *limit = s + length;
     810             :     
     811           0 :     while (s != limit) {
     812             :       uint16_t pvIndex;
     813           0 :       UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex);
     814           0 :       if (intersectMasks(mask, sel->pv+pvIndex, columns)) {
     815           0 :         break;
     816             :       }
     817             :     }
     818             :   }
     819           0 :   return selectForMask(sel, mask, status);
     820             : }
     821             : 
     822             : #endif  // !UCONFIG_NO_CONVERSION

Generated by: LCOV version 1.13