LCOV - output.info - intl/icu/source/i18n/collationdatawriter.cpp

LCOV - code coverage report

Current view:	top level - intl/icu/source/i18n - collationdatawriter.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	174	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	6	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2013-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationdatawriter.cpp
       9             : *
      10             : * created on: 2013aug06
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #include "unicode/utypes.h"
      15             : 
      16             : #if !UCONFIG_NO_COLLATION
      17             : 
      18             : #include "unicode/tblcoll.h"
      19             : #include "unicode/udata.h"
      20             : #include "unicode/uniset.h"
      21             : #include "cmemory.h"
      22             : #include "collationdata.h"
      23             : #include "collationdatabuilder.h"
      24             : #include "collationdatareader.h"
      25             : #include "collationdatawriter.h"
      26             : #include "collationfastlatin.h"
      27             : #include "collationsettings.h"
      28             : #include "collationtailoring.h"
      29             : #include "uassert.h"
      30             : #include "ucmndata.h"
      31             : 
      32             : U_NAMESPACE_BEGIN
      33             : 
      34             : uint8_t *
      35           0 : RuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
      36           0 :     if(U_FAILURE(errorCode)) { return NULL; }
      37           0 :     LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
      38           0 :     if(buffer.isNull()) {
      39           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
      40           0 :         return NULL;
      41             :     }
      42           0 :     length = cloneBinary(buffer.getAlias(), 20000, errorCode);
      43           0 :     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
      44           0 :         if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {
      45           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
      46           0 :             return NULL;
      47             :         }
      48           0 :         errorCode = U_ZERO_ERROR;
      49           0 :         length = cloneBinary(buffer.getAlias(), length, errorCode);
      50             :     }
      51           0 :     if(U_FAILURE(errorCode)) { return NULL; }
      52           0 :     return buffer.orphan();
      53             : }
      54             : 
      55             : int32_t
      56           0 : RuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
      57             :     int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
      58             :     return CollationDataWriter::writeTailoring(
      59           0 :             *tailoring, *settings, indexes, dest, capacity,
      60           0 :             errorCode);
      61             : }
      62             : 
      63             : static const UDataInfo dataInfo = {
      64             :     sizeof(UDataInfo),
      65             :     0,
      66             : 
      67             :     U_IS_BIG_ENDIAN,
      68             :     U_CHARSET_FAMILY,
      69             :     U_SIZEOF_UCHAR,
      70             :     0,
      71             : 
      72             :     { 0x55, 0x43, 0x6f, 0x6c },         // dataFormat="UCol"
      73             :     { 5, 0, 0, 0 },                     // formatVersion
      74             :     { 6, 3, 0, 0 }                      // dataVersion
      75             : };
      76             : 
      77             : int32_t
      78           0 : CollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
      79             :                                const void *rootElements, int32_t rootElementsLength,
      80             :                                int32_t indexes[], uint8_t *dest, int32_t capacity,
      81             :                                UErrorCode &errorCode) {
      82             :     return write(TRUE, NULL,
      83             :                  data, settings,
      84             :                  rootElements, rootElementsLength,
      85           0 :                  indexes, dest, capacity, errorCode);
      86             : }
      87             : 
      88             : int32_t
      89           0 : CollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
      90             :                                     int32_t indexes[], uint8_t *dest, int32_t capacity,
      91             :                                     UErrorCode &errorCode) {
      92           0 :     return write(FALSE, t.version,
      93           0 :                  *t.data, settings,
      94             :                  NULL, 0,
      95           0 :                  indexes, dest, capacity, errorCode);
      96             : }
      97             : 
      98             : int32_t
      99           0 : CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
     100             :                            const CollationData &data, const CollationSettings &settings,
     101             :                            const void *rootElements, int32_t rootElementsLength,
     102             :                            int32_t indexes[], uint8_t *dest, int32_t capacity,
     103             :                            UErrorCode &errorCode) {
     104           0 :     if(U_FAILURE(errorCode)) { return 0; }
     105           0 :     if(capacity < 0 || (capacity > 0 && dest == NULL)) {
     106           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     107           0 :         return 0;
     108             :     }
     109             : 
     110             :     // Figure out which data items to write before settling on
     111             :     // the indexes length and writing offsets.
     112             :     // For any data item, we need to write the start and limit offsets,
     113             :     // so the indexes length must be at least index-of-start-offset + 2.
     114             :     int32_t indexesLength;
     115             :     UBool hasMappings;
     116           0 :     UnicodeSet unsafeBackwardSet;
     117           0 :     const CollationData *baseData = data.base;
     118             : 
     119             :     int32_t fastLatinVersion;
     120           0 :     if(data.fastLatinTable != NULL) {
     121           0 :         fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
     122             :     } else {
     123           0 :         fastLatinVersion = 0;
     124             :     }
     125           0 :     int32_t fastLatinTableLength = 0;
     126             : 
     127           0 :     if(isBase) {
     128             :         // For the root collator, we write an even number of indexes
     129             :         // so that we start with an 8-aligned offset.
     130           0 :         indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
     131           0 :         U_ASSERT(settings.reorderCodesLength == 0);
     132           0 :         hasMappings = TRUE;
     133           0 :         unsafeBackwardSet = *data.unsafeBackwardSet;
     134           0 :         fastLatinTableLength = data.fastLatinTableLength;
     135           0 :     } else if(baseData == NULL) {
     136           0 :         hasMappings = FALSE;
     137           0 :         if(settings.reorderCodesLength == 0) {
     138             :             // only options
     139           0 :             indexesLength = CollationDataReader::IX_OPTIONS + 1;  // no limit offset here
     140             :         } else {
     141             :             // only options, reorder codes, and the reorder table
     142           0 :             indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
     143             :         }
     144             :     } else {
     145           0 :         hasMappings = TRUE;
     146             :         // Tailored mappings, and what else?
     147             :         // Check in ascending order of optional tailoring data items.
     148           0 :         indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
     149           0 :         if(data.contextsLength != 0) {
     150           0 :             indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
     151             :         }
     152           0 :         unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
     153           0 :         if(!unsafeBackwardSet.isEmpty()) {
     154           0 :             indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
     155             :         }
     156           0 :         if(data.fastLatinTable != baseData->fastLatinTable) {
     157           0 :             fastLatinTableLength = data.fastLatinTableLength;
     158           0 :             indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
     159             :         }
     160             :     }
     161             : 
     162           0 :     UVector32 codesAndRanges(errorCode);
     163           0 :     const int32_t *reorderCodes = settings.reorderCodes;
     164           0 :     int32_t reorderCodesLength = settings.reorderCodesLength;
     165           0 :     if(settings.hasReordering() &&
     166           0 :             CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
     167             :         // Rebuild the full list of reorder ranges.
     168             :         // The list in the settings is truncated for efficiency.
     169           0 :         data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
     170             :         // Write the codes, then the ranges.
     171           0 :         for(int32_t i = 0; i < reorderCodesLength; ++i) {
     172           0 :             codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
     173             :         }
     174           0 :         if(U_FAILURE(errorCode)) { return 0; }
     175           0 :         reorderCodes = codesAndRanges.getBuffer();
     176           0 :         reorderCodesLength = codesAndRanges.size();
     177             :     }
     178             : 
     179             :     int32_t headerSize;
     180           0 :     if(isBase) {
     181           0 :         headerSize = 0;  // udata_create() writes the header
     182             :     } else {
     183             :         DataHeader header;
     184           0 :         header.dataHeader.magic1 = 0xda;
     185           0 :         header.dataHeader.magic2 = 0x27;
     186           0 :         uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
     187           0 :         uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
     188           0 :         headerSize = (int32_t)sizeof(header);
     189           0 :         U_ASSERT((headerSize & 3) == 0);  // multiple of 4 bytes
     190           0 :         if(hasMappings && data.cesLength != 0) {
     191             :             // Sum of the sizes of the data items which are
     192             :             // not automatically multiples of 8 bytes and which are placed before the CEs.
     193           0 :             int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
     194           0 :             if((sum & 7) != 0) {
     195             :                 // We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
     196             :                 // We add to the header size here.
     197             :                 // Alternatively, we could increment the indexesLength
     198             :                 // or add a few bytes to the reorderTable.
     199           0 :                 headerSize += 4;
     200             :             }
     201             :         }
     202           0 :         header.dataHeader.headerSize = (uint16_t)headerSize;
     203           0 :         if(headerSize <= capacity) {
     204           0 :             uprv_memcpy(dest, &header, sizeof(header));
     205             :             // Write 00 bytes so that the padding is not mistaken for a copyright string.
     206           0 :             uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
     207           0 :             dest += headerSize;
     208           0 :             capacity -= headerSize;
     209             :         } else {
     210           0 :             dest = NULL;
     211           0 :             capacity = 0;
     212             :         }
     213             :     }
     214             : 
     215           0 :     indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
     216           0 :     U_ASSERT((settings.options & ~0xffff) == 0);
     217           0 :     indexes[CollationDataReader::IX_OPTIONS] =
     218           0 :             data.numericPrimary | fastLatinVersion | settings.options;
     219           0 :     indexes[CollationDataReader::IX_RESERVED2] = 0;
     220           0 :     indexes[CollationDataReader::IX_RESERVED3] = 0;
     221             : 
     222             :     // Byte offsets of data items all start from the start of the indexes.
     223             :     // We add the headerSize at the very end.
     224           0 :     int32_t totalSize = indexesLength * 4;
     225             : 
     226           0 :     if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
     227           0 :         indexes[CollationDataReader::IX_JAMO_CE32S_START] = data.jamoCE32s - data.ce32s;
     228             :     } else {
     229           0 :         indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
     230             :     }
     231             : 
     232           0 :     indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
     233           0 :     totalSize += reorderCodesLength * 4;
     234             : 
     235           0 :     indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
     236           0 :     if(settings.reorderTable != NULL) {
     237           0 :         totalSize += 256;
     238             :     }
     239             : 
     240           0 :     indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
     241           0 :     if(hasMappings) {
     242           0 :         UErrorCode errorCode2 = U_ZERO_ERROR;
     243             :         int32_t length;
     244           0 :         if(totalSize < capacity) {
     245           0 :             length = utrie2_serialize(data.trie, dest + totalSize,
     246           0 :                                       capacity - totalSize, &errorCode2);
     247             :         } else {
     248           0 :             length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);
     249             :         }
     250           0 :         if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
     251           0 :             errorCode = errorCode2;
     252           0 :             return 0;
     253             :         }
     254             :         // The trie size should be a multiple of 8 bytes due to the way
     255             :         // compactIndex2(UNewTrie2 *trie) currently works.
     256           0 :         U_ASSERT((length & 7) == 0);
     257           0 :         totalSize += length;
     258             :     }
     259             : 
     260           0 :     indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
     261           0 :     indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
     262           0 :     if(hasMappings && data.cesLength != 0) {
     263           0 :         U_ASSERT(((headerSize + totalSize) & 7) == 0);
     264           0 :         totalSize += data.cesLength * 8;
     265             :     }
     266             : 
     267           0 :     indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
     268           0 :     indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
     269           0 :     if(hasMappings) {
     270           0 :         totalSize += data.ce32sLength * 4;
     271             :     }
     272             : 
     273           0 :     indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
     274           0 :     totalSize += rootElementsLength * 4;
     275             : 
     276           0 :     indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
     277           0 :     if(hasMappings) {
     278           0 :         totalSize += data.contextsLength * 2;
     279             :     }
     280             : 
     281           0 :     indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
     282           0 :     if(hasMappings && !unsafeBackwardSet.isEmpty()) {
     283           0 :         UErrorCode errorCode2 = U_ZERO_ERROR;
     284             :         int32_t length;
     285           0 :         if(totalSize < capacity) {
     286           0 :             uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
     287           0 :             length = unsafeBackwardSet.serialize(
     288           0 :                     p, (capacity - totalSize) / 2, errorCode2);
     289             :         } else {
     290           0 :             length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);
     291             :         }
     292           0 :         if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
     293           0 :             errorCode = errorCode2;
     294           0 :             return 0;
     295             :         }
     296           0 :         totalSize += length * 2;
     297             :     }
     298             : 
     299           0 :     indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
     300           0 :     totalSize += fastLatinTableLength * 2;
     301             : 
     302           0 :     UnicodeString scripts;
     303           0 :     indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
     304           0 :     if(isBase) {
     305           0 :         scripts.append((UChar)data.numScripts);
     306           0 :         scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16);
     307           0 :         scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength);
     308           0 :         totalSize += scripts.length() * 2;
     309             :     }
     310             : 
     311           0 :     indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
     312           0 :     if(isBase) {
     313           0 :         totalSize += 256;
     314             :     }
     315             : 
     316           0 :     indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
     317           0 :     indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
     318             : 
     319           0 :     if(totalSize > capacity) {
     320           0 :         errorCode = U_BUFFER_OVERFLOW_ERROR;
     321           0 :         return headerSize + totalSize;
     322             :     }
     323             : 
     324           0 :     uprv_memcpy(dest, indexes, indexesLength * 4);
     325           0 :     copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
     326           0 :     copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
     327             :     // The trie has already been serialized into the dest buffer.
     328           0 :     copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
     329           0 :     copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
     330           0 :     copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
     331           0 :     copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
     332             :     // The unsafeBackwardSet has already been serialized into the dest buffer.
     333           0 :     copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
     334           0 :     copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
     335           0 :     copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);
     336             : 
     337           0 :     return headerSize + totalSize;
     338             : }
     339             : 
     340             : void
     341           0 : CollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
     342             :                               const void *src, uint8_t *dest) {
     343           0 :     int32_t start = indexes[startIndex];
     344           0 :     int32_t limit = indexes[startIndex + 1];
     345           0 :     if(start < limit) {
     346           0 :         uprv_memcpy(dest + start, src, limit - start);
     347             :     }
     348           0 : }
     349             : 
     350             : U_NAMESPACE_END
     351             : 
     352             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13