LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationkeys.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 354 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 21 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2012-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationkeys.cpp
       9             : *
      10             : * created on: 2012sep02
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #include "unicode/utypes.h"
      15             : 
      16             : #if !UCONFIG_NO_COLLATION
      17             : 
      18             : #include "unicode/bytestream.h"
      19             : #include "collation.h"
      20             : #include "collationiterator.h"
      21             : #include "collationkeys.h"
      22             : #include "collationsettings.h"
      23             : #include "uassert.h"
      24             : 
      25             : U_NAMESPACE_BEGIN
      26             : 
      27           0 : SortKeyByteSink::~SortKeyByteSink() {}
      28             : 
      29             : void
      30           0 : SortKeyByteSink::Append(const char *bytes, int32_t n) {
      31           0 :     if (n <= 0 || bytes == NULL) {
      32           0 :         return;
      33             :     }
      34           0 :     if (ignore_ > 0) {
      35           0 :         int32_t ignoreRest = ignore_ - n;
      36           0 :         if (ignoreRest >= 0) {
      37           0 :             ignore_ = ignoreRest;
      38           0 :             return;
      39             :         } else {
      40           0 :             bytes += ignore_;
      41           0 :             n = -ignoreRest;
      42           0 :             ignore_ = 0;
      43             :         }
      44             :     }
      45           0 :     int32_t length = appended_;
      46           0 :     appended_ += n;
      47           0 :     if ((buffer_ + length) == bytes) {
      48           0 :         return;  // the caller used GetAppendBuffer() and wrote the bytes already
      49             :     }
      50           0 :     int32_t available = capacity_ - length;
      51           0 :     if (n <= available) {
      52           0 :         uprv_memcpy(buffer_ + length, bytes, n);
      53             :     } else {
      54           0 :         AppendBeyondCapacity(bytes, n, length);
      55             :     }
      56             : }
      57             : 
      58             : char *
      59           0 : SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
      60             :                                  int32_t desired_capacity_hint,
      61             :                                  char *scratch,
      62             :                                  int32_t scratch_capacity,
      63             :                                  int32_t *result_capacity) {
      64           0 :     if (min_capacity < 1 || scratch_capacity < min_capacity) {
      65           0 :         *result_capacity = 0;
      66           0 :         return NULL;
      67             :     }
      68           0 :     if (ignore_ > 0) {
      69             :         // Do not write ignored bytes right at the end of the buffer.
      70           0 :         *result_capacity = scratch_capacity;
      71           0 :         return scratch;
      72             :     }
      73           0 :     int32_t available = capacity_ - appended_;
      74           0 :     if (available >= min_capacity) {
      75           0 :         *result_capacity = available;
      76           0 :         return buffer_ + appended_;
      77           0 :     } else if (Resize(desired_capacity_hint, appended_)) {
      78           0 :         *result_capacity = capacity_ - appended_;
      79           0 :         return buffer_ + appended_;
      80             :     } else {
      81           0 :         *result_capacity = scratch_capacity;
      82           0 :         return scratch;
      83             :     }
      84             : }
      85             : 
      86             : namespace {
      87             : 
      88             : /**
      89             :  * uint8_t byte buffer, similar to CharString but simpler.
      90             :  */
      91             : class SortKeyLevel : public UMemory {
      92             : public:
      93           0 :     SortKeyLevel() : len(0), ok(TRUE) {}
      94           0 :     ~SortKeyLevel() {}
      95             : 
      96             :     /** @return FALSE if memory allocation failed */
      97           0 :     UBool isOk() const { return ok; }
      98           0 :     UBool isEmpty() const { return len == 0; }
      99           0 :     int32_t length() const { return len; }
     100             :     const uint8_t *data() const { return buffer.getAlias(); }
     101           0 :     uint8_t operator[](int32_t index) const { return buffer[index]; }
     102             : 
     103           0 :     uint8_t *data() { return buffer.getAlias(); }
     104             : 
     105             :     void appendByte(uint32_t b);
     106             :     void appendWeight16(uint32_t w);
     107             :     void appendWeight32(uint32_t w);
     108             :     void appendReverseWeight16(uint32_t w);
     109             : 
     110             :     /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
     111           0 :     void appendTo(ByteSink &sink) const {
     112           0 :         U_ASSERT(len > 0 && buffer[len - 1] == 1);
     113           0 :         sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
     114           0 :     }
     115             : 
     116             : private:
     117             :     MaybeStackArray<uint8_t, 40> buffer;
     118             :     int32_t len;
     119             :     UBool ok;
     120             : 
     121             :     UBool ensureCapacity(int32_t appendCapacity);
     122             : 
     123             :     SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
     124             :     SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
     125             : };
     126             : 
     127           0 : void SortKeyLevel::appendByte(uint32_t b) {
     128           0 :     if(len < buffer.getCapacity() || ensureCapacity(1)) {
     129           0 :         buffer[len++] = (uint8_t)b;
     130             :     }
     131           0 : }
     132             : 
     133             : void
     134           0 : SortKeyLevel::appendWeight16(uint32_t w) {
     135           0 :     U_ASSERT((w & 0xffff) != 0);
     136           0 :     uint8_t b0 = (uint8_t)(w >> 8);
     137           0 :     uint8_t b1 = (uint8_t)w;
     138           0 :     int32_t appendLength = (b1 == 0) ? 1 : 2;
     139           0 :     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
     140           0 :         buffer[len++] = b0;
     141           0 :         if(b1 != 0) {
     142           0 :             buffer[len++] = b1;
     143             :         }
     144             :     }
     145           0 : }
     146             : 
     147             : void
     148           0 : SortKeyLevel::appendWeight32(uint32_t w) {
     149           0 :     U_ASSERT(w != 0);
     150           0 :     uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
     151           0 :     int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
     152           0 :     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
     153           0 :         buffer[len++] = bytes[0];
     154           0 :         if(bytes[1] != 0) {
     155           0 :             buffer[len++] = bytes[1];
     156           0 :             if(bytes[2] != 0) {
     157           0 :                 buffer[len++] = bytes[2];
     158           0 :                 if(bytes[3] != 0) {
     159           0 :                     buffer[len++] = bytes[3];
     160             :                 }
     161             :             }
     162             :         }
     163             :     }
     164           0 : }
     165             : 
     166             : void
     167           0 : SortKeyLevel::appendReverseWeight16(uint32_t w) {
     168           0 :     U_ASSERT((w & 0xffff) != 0);
     169           0 :     uint8_t b0 = (uint8_t)(w >> 8);
     170           0 :     uint8_t b1 = (uint8_t)w;
     171           0 :     int32_t appendLength = (b1 == 0) ? 1 : 2;
     172           0 :     if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
     173           0 :         if(b1 == 0) {
     174           0 :             buffer[len++] = b0;
     175             :         } else {
     176           0 :             buffer[len] = b1;
     177           0 :             buffer[len + 1] = b0;
     178           0 :             len += 2;
     179             :         }
     180             :     }
     181           0 : }
     182             : 
     183           0 : UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
     184           0 :     if(!ok) {
     185           0 :         return FALSE;
     186             :     }
     187           0 :     int32_t newCapacity = 2 * buffer.getCapacity();
     188           0 :     int32_t altCapacity = len + 2 * appendCapacity;
     189           0 :     if (newCapacity < altCapacity) {
     190           0 :         newCapacity = altCapacity;
     191             :     }
     192           0 :     if (newCapacity < 200) {
     193           0 :         newCapacity = 200;
     194             :     }
     195           0 :     if(buffer.resize(newCapacity, len)==NULL) {
     196           0 :         return ok = FALSE;
     197             :     }
     198           0 :     return TRUE;
     199             : }
     200             : 
     201             : }  // namespace
     202             : 
     203           0 : CollationKeys::LevelCallback::~LevelCallback() {}
     204             : 
     205             : UBool
     206           0 : CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
     207             : 
     208             : /**
     209             :  * Map from collation strength (UColAttributeValue)
     210             :  * to a mask of Collation::Level bits up to that strength,
     211             :  * excluding the CASE_LEVEL which is independent of the strength,
     212             :  * and excluding IDENTICAL_LEVEL which this function does not write.
     213             :  */
     214             : static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
     215             :     2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
     216             :     6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
     217             :     0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
     218             :     0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
     219             :     0, 0, 0, 0,
     220             :     0, 0, 0, 0,
     221             :     0, 0, 0,
     222             :     0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
     223             : };
     224             : 
     225             : void
     226           0 : CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
     227             :                                           const UBool *compressibleBytes,
     228             :                                           const CollationSettings &settings,
     229             :                                           SortKeyByteSink &sink,
     230             :                                           Collation::Level minLevel, LevelCallback &callback,
     231             :                                           UBool preflight, UErrorCode &errorCode) {
     232           0 :     if(U_FAILURE(errorCode)) { return; }
     233             : 
     234           0 :     int32_t options = settings.options;
     235             :     // Set of levels to process and write.
     236           0 :     uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
     237           0 :     if((options & CollationSettings::CASE_LEVEL) != 0) {
     238           0 :         levels |= Collation::CASE_LEVEL_FLAG;
     239             :     }
     240             :     // Minus the levels below minLevel.
     241           0 :     levels &= ~(((uint32_t)1 << minLevel) - 1);
     242           0 :     if(levels == 0) { return; }
     243             : 
     244             :     uint32_t variableTop;
     245           0 :     if((options & CollationSettings::ALTERNATE_MASK) == 0) {
     246           0 :         variableTop = 0;
     247             :     } else {
     248             :         // +1 so that we can use "<" and primary ignorables test out early.
     249           0 :         variableTop = settings.variableTop + 1;
     250             :     }
     251             : 
     252           0 :     uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
     253             : 
     254           0 :     SortKeyLevel cases;
     255           0 :     SortKeyLevel secondaries;
     256           0 :     SortKeyLevel tertiaries;
     257           0 :     SortKeyLevel quaternaries;
     258             : 
     259           0 :     uint32_t prevReorderedPrimary = 0;  // 0==no compression
     260           0 :     int32_t commonCases = 0;
     261           0 :     int32_t commonSecondaries = 0;
     262           0 :     int32_t commonTertiaries = 0;
     263           0 :     int32_t commonQuaternaries = 0;
     264             : 
     265           0 :     uint32_t prevSecondary = 0;
     266           0 :     int32_t secSegmentStart = 0;
     267             : 
     268             :     for(;;) {
     269             :         // No need to keep all CEs in the buffer when we write a sort key.
     270           0 :         iter.clearCEsIfNoneRemaining();
     271           0 :         int64_t ce = iter.nextCE(errorCode);
     272           0 :         uint32_t p = (uint32_t)(ce >> 32);
     273           0 :         if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
     274             :             // Variable CE, shift it to quaternary level.
     275             :             // Ignore all following primary ignorables, and shift further variable CEs.
     276           0 :             if(commonQuaternaries != 0) {
     277           0 :                 --commonQuaternaries;
     278           0 :                 while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
     279           0 :                     quaternaries.appendByte(QUAT_COMMON_MIDDLE);
     280           0 :                     commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
     281             :                 }
     282             :                 // Shifted primary weights are lower than the common weight.
     283           0 :                 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
     284           0 :                 commonQuaternaries = 0;
     285             :             }
     286           0 :             do {
     287           0 :                 if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
     288           0 :                     if(settings.hasReordering()) {
     289           0 :                         p = settings.reorder(p);
     290             :                     }
     291           0 :                     if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
     292             :                         // Prevent shifted primary lead bytes from
     293             :                         // overlapping with the common compression range.
     294           0 :                         quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
     295             :                     }
     296           0 :                     quaternaries.appendWeight32(p);
     297             :                 }
     298           0 :                 do {
     299           0 :                     ce = iter.nextCE(errorCode);
     300           0 :                     p = (uint32_t)(ce >> 32);
     301           0 :                 } while(p == 0);
     302           0 :             } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
     303             :         }
     304             :         // ce could be primary ignorable, or NO_CE, or the merge separator,
     305             :         // or a regular primary CE, but it is not variable.
     306             :         // If ce==NO_CE, then write nothing for the primary level but
     307             :         // terminate compression on all levels and then exit the loop.
     308           0 :         if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
     309             :             // Test the un-reordered primary for compressibility.
     310           0 :             UBool isCompressible = compressibleBytes[p >> 24];
     311           0 :             if(settings.hasReordering()) {
     312           0 :                 p = settings.reorder(p);
     313             :             }
     314           0 :             uint32_t p1 = p >> 24;
     315           0 :             if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) {
     316           0 :                 if(prevReorderedPrimary != 0) {
     317           0 :                     if(p < prevReorderedPrimary) {
     318             :                         // No primary compression terminator
     319             :                         // at the end of the level or merged segment.
     320           0 :                         if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
     321           0 :                             sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
     322             :                         }
     323             :                     } else {
     324           0 :                         sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
     325             :                     }
     326             :                 }
     327           0 :                 sink.Append(p1);
     328           0 :                 if(isCompressible) {
     329           0 :                     prevReorderedPrimary = p;
     330             :                 } else {
     331           0 :                     prevReorderedPrimary = 0;
     332             :                 }
     333             :             }
     334           0 :             char p2 = (char)(p >> 16);
     335           0 :             if(p2 != 0) {
     336           0 :                 char buffer[3] = { p2, (char)(p >> 8), (char)p };
     337           0 :                 sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
     338             :             }
     339             :             // Optimization for internalNextSortKeyPart():
     340             :             // When the primary level overflows we can stop because we need not
     341             :             // calculate (preflight) the whole sort key length.
     342           0 :             if(!preflight && sink.Overflowed()) {
     343           0 :                 if(U_SUCCESS(errorCode) && !sink.IsOk()) {
     344           0 :                     errorCode = U_MEMORY_ALLOCATION_ERROR;
     345             :                 }
     346           0 :                 return;
     347             :             }
     348             :         }
     349             : 
     350           0 :         uint32_t lower32 = (uint32_t)ce;
     351           0 :         if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
     352             : 
     353           0 :         if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
     354           0 :             uint32_t s = lower32 >> 16;
     355           0 :             if(s == 0) {
     356             :                 // secondary ignorable
     357           0 :             } else if(s == Collation::COMMON_WEIGHT16 &&
     358           0 :                     ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
     359             :                         p != Collation::MERGE_SEPARATOR_PRIMARY)) {
     360             :                 // s is a common secondary weight, and
     361             :                 // backwards-secondary is off or the ce is not the merge separator.
     362           0 :                 ++commonSecondaries;
     363           0 :             } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
     364           0 :                 if(commonSecondaries != 0) {
     365           0 :                     --commonSecondaries;
     366           0 :                     while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
     367           0 :                         secondaries.appendByte(SEC_COMMON_MIDDLE);
     368           0 :                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
     369             :                     }
     370             :                     uint32_t b;
     371           0 :                     if(s < Collation::COMMON_WEIGHT16) {
     372           0 :                         b = SEC_COMMON_LOW + commonSecondaries;
     373             :                     } else {
     374           0 :                         b = SEC_COMMON_HIGH - commonSecondaries;
     375             :                     }
     376           0 :                     secondaries.appendByte(b);
     377           0 :                     commonSecondaries = 0;
     378             :                 }
     379           0 :                 secondaries.appendWeight16(s);
     380             :             } else {
     381           0 :                 if(commonSecondaries != 0) {
     382           0 :                     --commonSecondaries;
     383             :                     // Append reverse weights. The level will be re-reversed later.
     384           0 :                     int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
     385             :                     uint32_t b;
     386           0 :                     if(prevSecondary < Collation::COMMON_WEIGHT16) {
     387           0 :                         b = SEC_COMMON_LOW + remainder;
     388             :                     } else {
     389           0 :                         b = SEC_COMMON_HIGH - remainder;
     390             :                     }
     391           0 :                     secondaries.appendByte(b);
     392           0 :                     commonSecondaries -= remainder;
     393             :                     // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
     394           0 :                     while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
     395           0 :                         secondaries.appendByte(SEC_COMMON_MIDDLE);
     396           0 :                         commonSecondaries -= SEC_COMMON_MAX_COUNT;
     397             :                     }
     398             :                     // commonSecondaries == 0
     399             :                 }
     400           0 :                 if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
     401             :                     // The backwards secondary level compares secondary weights backwards
     402             :                     // within segments separated by the merge separator (U+FFFE).
     403           0 :                     uint8_t *secs = secondaries.data();
     404           0 :                     int32_t last = secondaries.length() - 1;
     405           0 :                     if(secSegmentStart < last) {
     406           0 :                         uint8_t *p = secs + secSegmentStart;
     407           0 :                         uint8_t *q = secs + last;
     408           0 :                         do {
     409           0 :                             uint8_t b = *p;
     410           0 :                             *p++ = *q;
     411           0 :                             *q-- = b;
     412           0 :                         } while(p < q);
     413             :                     }
     414           0 :                     secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
     415           0 :                         Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
     416           0 :                     prevSecondary = 0;
     417           0 :                     secSegmentStart = secondaries.length();
     418             :                 } else {
     419           0 :                     secondaries.appendReverseWeight16(s);
     420           0 :                     prevSecondary = s;
     421             :                 }
     422             :             }
     423             :         }
     424             : 
     425           0 :         if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
     426           0 :             if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
     427             :                     p == 0 : lower32 <= 0xffff) {
     428             :                 // Primary+caseLevel: Ignore case level weights of primary ignorables.
     429             :                 // Otherwise: Ignore case level weights of secondary ignorables.
     430             :                 // For details see the comments in the CollationCompare class.
     431             :             } else {
     432           0 :                 uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
     433           0 :                 U_ASSERT((c & 0xc0) != 0xc0);
     434           0 :                 if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
     435           0 :                     ++commonCases;
     436             :                 } else {
     437           0 :                     if((options & CollationSettings::UPPER_FIRST) == 0) {
     438             :                         // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
     439             :                         // If there are only common (=lowest) weights in the whole level,
     440             :                         // then we need not write anything.
     441             :                         // Level length differences are handled already on the next-higher level.
     442           0 :                         if(commonCases != 0 &&
     443           0 :                                 (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
     444           0 :                             --commonCases;
     445           0 :                             while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
     446           0 :                                 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
     447           0 :                                 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
     448             :                             }
     449             :                             uint32_t b;
     450           0 :                             if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
     451           0 :                                 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
     452             :                             } else {
     453           0 :                                 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
     454             :                             }
     455           0 :                             cases.appendByte(b << 4);
     456           0 :                             commonCases = 0;
     457             :                         }
     458           0 :                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
     459           0 :                             c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
     460             :                         }
     461             :                     } else {
     462             :                         // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
     463             :                         // The compressed common case weights only go up from the "low" value
     464             :                         // because with upperFirst the common weight is the highest one.
     465           0 :                         if(commonCases != 0) {
     466           0 :                             --commonCases;
     467           0 :                             while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
     468           0 :                                 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
     469           0 :                                 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
     470             :                             }
     471           0 :                             cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
     472           0 :                             commonCases = 0;
     473             :                         }
     474           0 :                         if(c > Collation::LEVEL_SEPARATOR_BYTE) {
     475           0 :                             c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
     476             :                         }
     477             :                     }
     478             :                     // c is a separator byte 01,
     479             :                     // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
     480           0 :                     cases.appendByte(c);
     481             :                 }
     482             :             }
     483             :         }
     484             : 
     485           0 :         if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
     486           0 :             uint32_t t = lower32 & tertiaryMask;
     487           0 :             U_ASSERT((lower32 & 0xc000) != 0xc000);
     488           0 :             if(t == Collation::COMMON_WEIGHT16) {
     489           0 :                 ++commonTertiaries;
     490           0 :             } else if((tertiaryMask & 0x8000) == 0) {
     491             :                 // Tertiary weights without case bits.
     492             :                 // Move lead bytes 06..3F to C6..FF for a large common-weight range.
     493           0 :                 if(commonTertiaries != 0) {
     494           0 :                     --commonTertiaries;
     495           0 :                     while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
     496           0 :                         tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
     497           0 :                         commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
     498             :                     }
     499             :                     uint32_t b;
     500           0 :                     if(t < Collation::COMMON_WEIGHT16) {
     501           0 :                         b = TER_ONLY_COMMON_LOW + commonTertiaries;
     502             :                     } else {
     503           0 :                         b = TER_ONLY_COMMON_HIGH - commonTertiaries;
     504             :                     }
     505           0 :                     tertiaries.appendByte(b);
     506           0 :                     commonTertiaries = 0;
     507             :                 }
     508           0 :                 if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
     509           0 :                 tertiaries.appendWeight16(t);
     510           0 :             } else if((options & CollationSettings::UPPER_FIRST) == 0) {
     511             :                 // Tertiary weights with caseFirst=lowerFirst.
     512             :                 // Move lead bytes 06..BF to 46..FF for the common-weight range.
     513           0 :                 if(commonTertiaries != 0) {
     514           0 :                     --commonTertiaries;
     515           0 :                     while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
     516           0 :                         tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
     517           0 :                         commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
     518             :                     }
     519             :                     uint32_t b;
     520           0 :                     if(t < Collation::COMMON_WEIGHT16) {
     521           0 :                         b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
     522             :                     } else {
     523           0 :                         b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
     524             :                     }
     525           0 :                     tertiaries.appendByte(b);
     526           0 :                     commonTertiaries = 0;
     527             :                 }
     528           0 :                 if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
     529           0 :                 tertiaries.appendWeight16(t);
     530             :             } else {
     531             :                 // Tertiary weights with caseFirst=upperFirst.
     532             :                 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
     533             :                 // to keep tertiary CEs well-formed.
     534             :                 // Their case+tertiary weights must be greater than those of
     535             :                 // primary and secondary CEs.
     536             :                 //
     537             :                 // Separator         01 -> 01      (unchanged)
     538             :                 // Lowercase     02..04 -> 82..84  (includes uncased)
     539             :                 // Common weight     05 -> 85..C5  (common-weight compression range)
     540             :                 // Lowercase     06..3F -> C6..FF
     541             :                 // Mixed case    42..7F -> 42..7F
     542             :                 // Uppercase     82..BF -> 02..3F
     543             :                 // Tertiary CE   86..BF -> C6..FF
     544           0 :                 if(t <= Collation::NO_CE_WEIGHT16) {
     545             :                     // Keep separators unchanged.
     546           0 :                 } else if(lower32 > 0xffff) {
     547             :                     // Invert case bits of primary & secondary CEs.
     548           0 :                     t ^= 0xc000;
     549           0 :                     if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
     550           0 :                         t -= 0x4000;
     551             :                     }
     552             :                 } else {
     553             :                     // Keep uppercase bits of tertiary CEs.
     554           0 :                     U_ASSERT(0x8600 <= t && t <= 0xbfff);
     555           0 :                     t += 0x4000;
     556             :                 }
     557           0 :                 if(commonTertiaries != 0) {
     558           0 :                     --commonTertiaries;
     559           0 :                     while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
     560           0 :                         tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
     561           0 :                         commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
     562             :                     }
     563             :                     uint32_t b;
     564           0 :                     if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
     565           0 :                         b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
     566             :                     } else {
     567           0 :                         b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
     568             :                     }
     569           0 :                     tertiaries.appendByte(b);
     570           0 :                     commonTertiaries = 0;
     571             :                 }
     572           0 :                 tertiaries.appendWeight16(t);
     573             :             }
     574             :         }
     575             : 
     576           0 :         if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
     577           0 :             uint32_t q = lower32 & 0xffff;
     578           0 :             if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
     579           0 :                 ++commonQuaternaries;
     580           0 :             } else if(q == Collation::NO_CE_WEIGHT16 &&
     581           0 :                     (options & CollationSettings::ALTERNATE_MASK) == 0 &&
     582           0 :                     quaternaries.isEmpty()) {
     583             :                 // If alternate=non-ignorable and there are only common quaternary weights,
     584             :                 // then we need not write anything.
     585             :                 // The only weights greater than the merge separator and less than the common weight
     586             :                 // are shifted primary weights, which are not generated for alternate=non-ignorable.
     587             :                 // There are also exactly as many quaternary weights as tertiary weights,
     588             :                 // so level length differences are handled already on tertiary level.
     589             :                 // Any above-common quaternary weight will compare greater regardless.
     590           0 :                 quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
     591             :             } else {
     592           0 :                 if(q == Collation::NO_CE_WEIGHT16) {
     593           0 :                     q = Collation::LEVEL_SEPARATOR_BYTE;
     594             :                 } else {
     595           0 :                     q = 0xfc + ((q >> 6) & 3);
     596             :                 }
     597           0 :                 if(commonQuaternaries != 0) {
     598           0 :                     --commonQuaternaries;
     599           0 :                     while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
     600           0 :                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
     601           0 :                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
     602             :                     }
     603             :                     uint32_t b;
     604           0 :                     if(q < QUAT_COMMON_LOW) {
     605           0 :                         b = QUAT_COMMON_LOW + commonQuaternaries;
     606             :                     } else {
     607           0 :                         b = QUAT_COMMON_HIGH - commonQuaternaries;
     608             :                     }
     609           0 :                     quaternaries.appendByte(b);
     610           0 :                     commonQuaternaries = 0;
     611             :                 }
     612           0 :                 quaternaries.appendByte(q);
     613             :             }
     614             :         }
     615             : 
     616           0 :         if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
     617           0 :     }
     618             : 
     619           0 :     if(U_FAILURE(errorCode)) { return; }
     620             : 
     621             :     // Append the beyond-primary levels.
     622           0 :     UBool ok = TRUE;
     623           0 :     if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
     624           0 :         if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
     625           0 :         ok &= secondaries.isOk();
     626           0 :         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
     627           0 :         secondaries.appendTo(sink);
     628             :     }
     629             : 
     630           0 :     if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
     631           0 :         if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
     632           0 :         ok &= cases.isOk();
     633           0 :         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
     634             :         // Write pairs of nibbles as bytes, except separator bytes as themselves.
     635           0 :         int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
     636           0 :         uint8_t b = 0;
     637           0 :         for(int32_t i = 0; i < length; ++i) {
     638           0 :             uint8_t c = (uint8_t)cases[i];
     639           0 :             U_ASSERT((c & 0xf) == 0 && c != 0);
     640           0 :             if(b == 0) {
     641           0 :                 b = c;
     642             :             } else {
     643           0 :                 sink.Append(b | (c >> 4));
     644           0 :                 b = 0;
     645             :             }
     646             :         }
     647           0 :         if(b != 0) {
     648           0 :             sink.Append(b);
     649             :         }
     650             :     }
     651             : 
     652           0 :     if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
     653           0 :         if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
     654           0 :         ok &= tertiaries.isOk();
     655           0 :         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
     656           0 :         tertiaries.appendTo(sink);
     657             :     }
     658             : 
     659           0 :     if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
     660           0 :         if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
     661           0 :         ok &= quaternaries.isOk();
     662           0 :         sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
     663           0 :         quaternaries.appendTo(sink);
     664             :     }
     665             : 
     666           0 :     if(!ok || !sink.IsOk()) {
     667           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     668             :     }
     669             : }
     670             : 
     671             : U_NAMESPACE_END
     672             : 
     673             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13