LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationfastlatin.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 690 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 11 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2013-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationfastlatin.cpp
       9             : *
      10             : * created on: 2013aug18
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #include "unicode/utypes.h"
      15             : 
      16             : #if !UCONFIG_NO_COLLATION
      17             : 
      18             : #include "unicode/ucol.h"
      19             : #include "collationdata.h"
      20             : #include "collationfastlatin.h"
      21             : #include "collationsettings.h"
      22             : #include "uassert.h"
      23             : 
      24             : U_NAMESPACE_BEGIN
      25             : 
      26             : int32_t
      27           0 : CollationFastLatin::getOptions(const CollationData *data, const CollationSettings &settings,
      28             :                                uint16_t *primaries, int32_t capacity) {
      29           0 :     const uint16_t *table = data->fastLatinTable;
      30           0 :     if(table == NULL) { return -1; }
      31           0 :     U_ASSERT(capacity == LATIN_LIMIT);
      32           0 :     if(capacity != LATIN_LIMIT) { return -1; }
      33             : 
      34             :     uint32_t miniVarTop;
      35           0 :     if((settings.options & CollationSettings::ALTERNATE_MASK) == 0) {
      36             :         // No mini primaries are variable, set a variableTop just below the
      37             :         // lowest long mini primary.
      38           0 :         miniVarTop = MIN_LONG - 1;
      39             :     } else {
      40           0 :         int32_t headerLength = *table & 0xff;
      41           0 :         int32_t i = 1 + settings.getMaxVariable();
      42           0 :         if(i >= headerLength) {
      43           0 :             return -1;  // variableTop >= digits, should not occur
      44             :         }
      45           0 :         miniVarTop = table[i];
      46             :     }
      47             : 
      48           0 :     UBool digitsAreReordered = FALSE;
      49           0 :     if(settings.hasReordering()) {
      50           0 :         uint32_t prevStart = 0;
      51           0 :         uint32_t beforeDigitStart = 0;
      52           0 :         uint32_t digitStart = 0;
      53           0 :         uint32_t afterDigitStart = 0;
      54           0 :         for(int32_t group = UCOL_REORDER_CODE_FIRST;
      55           0 :                 group < UCOL_REORDER_CODE_FIRST + CollationData::MAX_NUM_SPECIAL_REORDER_CODES;
      56             :                 ++group) {
      57           0 :             uint32_t start = data->getFirstPrimaryForGroup(group);
      58           0 :             start = settings.reorder(start);
      59           0 :             if(group == UCOL_REORDER_CODE_DIGIT) {
      60           0 :                 beforeDigitStart = prevStart;
      61           0 :                 digitStart = start;
      62           0 :             } else if(start != 0) {
      63           0 :                 if(start < prevStart) {
      64             :                     // The permutation affects the groups up to Latin.
      65           0 :                     return -1;
      66             :                 }
      67             :                 // In the future, there might be a special group between digits & Latin.
      68           0 :                 if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) {
      69           0 :                     afterDigitStart = start;
      70             :                 }
      71           0 :                 prevStart = start;
      72             :             }
      73             :         }
      74           0 :         uint32_t latinStart = data->getFirstPrimaryForGroup(USCRIPT_LATIN);
      75           0 :         latinStart = settings.reorder(latinStart);
      76           0 :         if(latinStart < prevStart) {
      77           0 :             return -1;
      78             :         }
      79           0 :         if(afterDigitStart == 0) {
      80           0 :             afterDigitStart = latinStart;
      81             :         }
      82           0 :         if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
      83           0 :             digitsAreReordered = TRUE;
      84             :         }
      85             :     }
      86             : 
      87           0 :     table += (table[0] & 0xff);  // skip the header
      88           0 :     for(UChar32 c = 0; c < LATIN_LIMIT; ++c) {
      89           0 :         uint32_t p = table[c];
      90           0 :         if(p >= MIN_SHORT) {
      91           0 :             p &= SHORT_PRIMARY_MASK;
      92           0 :         } else if(p > miniVarTop) {
      93           0 :             p &= LONG_PRIMARY_MASK;
      94             :         } else {
      95           0 :             p = 0;
      96             :         }
      97           0 :         primaries[c] = (uint16_t)p;
      98             :     }
      99           0 :     if(digitsAreReordered || (settings.options & CollationSettings::NUMERIC) != 0) {
     100             :         // Bail out for digits.
     101           0 :         for(UChar32 c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
     102             :     }
     103             : 
     104             :     // Shift the miniVarTop above other options.
     105           0 :     return ((int32_t)miniVarTop << 16) | settings.options;
     106             : }
     107             : 
     108             : int32_t
     109           0 : CollationFastLatin::compareUTF16(const uint16_t *table, const uint16_t *primaries, int32_t options,
     110             :                                  const UChar *left, int32_t leftLength,
     111             :                                  const UChar *right, int32_t rightLength) {
     112             :     // This is a modified copy of CollationCompare::compareUpToQuaternary(),
     113             :     // optimized for common Latin text.
     114             :     // Keep them in sync!
     115             :     // Keep compareUTF16() and compareUTF8() in sync very closely!
     116             : 
     117           0 :     U_ASSERT((table[0] >> 8) == VERSION);
     118           0 :     table += (table[0] & 0xff);  // skip the header
     119           0 :     uint32_t variableTop = (uint32_t)options >> 16;  // see getOptions()
     120           0 :     options &= 0xffff;  // needed for CollationSettings::getStrength() to work
     121             : 
     122             :     // Check for supported characters, fetch mini CEs, and compare primaries.
     123           0 :     int32_t leftIndex = 0, rightIndex = 0;
     124             :     /**
     125             :      * Single mini CE or a pair.
     126             :      * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
     127             :      * If there is only one, then it is in the lower bits, and the upper bits are 0.
     128             :      */
     129           0 :     uint32_t leftPair = 0, rightPair = 0;
     130             :     for(;;) {
     131             :         // We fetch CEs until we get a non-ignorable primary or reach the end.
     132           0 :         while(leftPair == 0) {
     133           0 :             if(leftIndex == leftLength) {
     134           0 :                 leftPair = EOS;
     135           0 :                 break;
     136             :             }
     137           0 :             UChar32 c = left[leftIndex++];
     138           0 :             if(c <= LATIN_MAX) {
     139           0 :                 leftPair = primaries[c];
     140           0 :                 if(leftPair != 0) { break; }
     141           0 :                 if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
     142           0 :                     return BAIL_OUT_RESULT;
     143             :                 }
     144           0 :                 leftPair = table[c];
     145           0 :             } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
     146           0 :                 leftPair = table[c - PUNCT_START + LATIN_LIMIT];
     147             :             } else {
     148           0 :                 leftPair = lookup(table, c);
     149             :             }
     150           0 :             if(leftPair >= MIN_SHORT) {
     151           0 :                 leftPair &= SHORT_PRIMARY_MASK;
     152           0 :                 break;
     153           0 :             } else if(leftPair > variableTop) {
     154           0 :                 leftPair &= LONG_PRIMARY_MASK;
     155           0 :                 break;
     156             :             } else {
     157           0 :                 leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
     158           0 :                 if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
     159           0 :                 leftPair = getPrimaries(variableTop, leftPair);
     160             :             }
     161             :         }
     162             : 
     163           0 :         while(rightPair == 0) {
     164           0 :             if(rightIndex == rightLength) {
     165           0 :                 rightPair = EOS;
     166           0 :                 break;
     167             :             }
     168           0 :             UChar32 c = right[rightIndex++];
     169           0 :             if(c <= LATIN_MAX) {
     170           0 :                 rightPair = primaries[c];
     171           0 :                 if(rightPair != 0) { break; }
     172           0 :                 if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
     173           0 :                     return BAIL_OUT_RESULT;
     174             :                 }
     175           0 :                 rightPair = table[c];
     176           0 :             } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
     177           0 :                 rightPair = table[c - PUNCT_START + LATIN_LIMIT];
     178             :             } else {
     179           0 :                 rightPair = lookup(table, c);
     180             :             }
     181           0 :             if(rightPair >= MIN_SHORT) {
     182           0 :                 rightPair &= SHORT_PRIMARY_MASK;
     183           0 :                 break;
     184           0 :             } else if(rightPair > variableTop) {
     185           0 :                 rightPair &= LONG_PRIMARY_MASK;
     186           0 :                 break;
     187             :             } else {
     188           0 :                 rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
     189           0 :                 if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
     190           0 :                 rightPair = getPrimaries(variableTop, rightPair);
     191             :             }
     192             :         }
     193             : 
     194           0 :         if(leftPair == rightPair) {
     195           0 :             if(leftPair == EOS) { break; }
     196           0 :             leftPair = rightPair = 0;
     197           0 :             continue;
     198             :         }
     199           0 :         uint32_t leftPrimary = leftPair & 0xffff;
     200           0 :         uint32_t rightPrimary = rightPair & 0xffff;
     201           0 :         if(leftPrimary != rightPrimary) {
     202             :             // Return the primary difference.
     203           0 :             return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
     204             :         }
     205           0 :         if(leftPair == EOS) { break; }
     206           0 :         leftPair >>= 16;
     207           0 :         rightPair >>= 16;
     208           0 :     }
     209             :     // In the following, we need to re-fetch each character because we did not buffer the CEs,
     210             :     // but we know that the string is well-formed and
     211             :     // only contains supported characters and mappings.
     212             : 
     213             :     // We might skip the secondary level but continue with the case level
     214             :     // which is turned on separately.
     215           0 :     if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
     216           0 :         leftIndex = rightIndex = 0;
     217           0 :         leftPair = rightPair = 0;
     218             :         for(;;) {
     219           0 :             while(leftPair == 0) {
     220           0 :                 if(leftIndex == leftLength) {
     221           0 :                     leftPair = EOS;
     222           0 :                     break;
     223             :                 }
     224           0 :                 UChar32 c = left[leftIndex++];
     225           0 :                 if(c <= LATIN_MAX) {
     226           0 :                     leftPair = table[c];
     227           0 :                 } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
     228           0 :                     leftPair = table[c - PUNCT_START + LATIN_LIMIT];
     229             :                 } else {
     230           0 :                     leftPair = lookup(table, c);
     231             :                 }
     232           0 :                 if(leftPair >= MIN_SHORT) {
     233           0 :                     leftPair = getSecondariesFromOneShortCE(leftPair);
     234           0 :                     break;
     235           0 :                 } else if(leftPair > variableTop) {
     236           0 :                     leftPair = COMMON_SEC_PLUS_OFFSET;
     237           0 :                     break;
     238             :                 } else {
     239           0 :                     leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
     240           0 :                     leftPair = getSecondaries(variableTop, leftPair);
     241             :                 }
     242             :             }
     243             : 
     244           0 :             while(rightPair == 0) {
     245           0 :                 if(rightIndex == rightLength) {
     246           0 :                     rightPair = EOS;
     247           0 :                     break;
     248             :                 }
     249           0 :                 UChar32 c = right[rightIndex++];
     250           0 :                 if(c <= LATIN_MAX) {
     251           0 :                     rightPair = table[c];
     252           0 :                 } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
     253           0 :                     rightPair = table[c - PUNCT_START + LATIN_LIMIT];
     254             :                 } else {
     255           0 :                     rightPair = lookup(table, c);
     256             :                 }
     257           0 :                 if(rightPair >= MIN_SHORT) {
     258           0 :                     rightPair = getSecondariesFromOneShortCE(rightPair);
     259           0 :                     break;
     260           0 :                 } else if(rightPair > variableTop) {
     261           0 :                     rightPair = COMMON_SEC_PLUS_OFFSET;
     262           0 :                     break;
     263             :                 } else {
     264           0 :                     rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
     265           0 :                     rightPair = getSecondaries(variableTop, rightPair);
     266             :                 }
     267             :             }
     268             : 
     269           0 :             if(leftPair == rightPair) {
     270           0 :                 if(leftPair == EOS) { break; }
     271           0 :                 leftPair = rightPair = 0;
     272           0 :                 continue;
     273             :             }
     274           0 :             uint32_t leftSecondary = leftPair & 0xffff;
     275           0 :             uint32_t rightSecondary = rightPair & 0xffff;
     276           0 :             if(leftSecondary != rightSecondary) {
     277           0 :                 if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
     278             :                     // Full support for backwards secondary requires backwards contraction matching
     279             :                     // and moving backwards between merge separators.
     280           0 :                     return BAIL_OUT_RESULT;
     281             :                 }
     282           0 :                 return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
     283             :             }
     284           0 :             if(leftPair == EOS) { break; }
     285           0 :             leftPair >>= 16;
     286           0 :             rightPair >>= 16;
     287           0 :         }
     288             :     }
     289             : 
     290           0 :     if((options & CollationSettings::CASE_LEVEL) != 0) {
     291           0 :         UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY;
     292           0 :         leftIndex = rightIndex = 0;
     293           0 :         leftPair = rightPair = 0;
     294             :         for(;;) {
     295           0 :             while(leftPair == 0) {
     296           0 :                 if(leftIndex == leftLength) {
     297           0 :                     leftPair = EOS;
     298           0 :                     break;
     299             :                 }
     300           0 :                 UChar32 c = left[leftIndex++];
     301           0 :                 leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     302           0 :                 if(leftPair < MIN_LONG) {
     303           0 :                     leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
     304             :                 }
     305           0 :                 leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
     306             :             }
     307             : 
     308           0 :             while(rightPair == 0) {
     309           0 :                 if(rightIndex == rightLength) {
     310           0 :                     rightPair = EOS;
     311           0 :                     break;
     312             :                 }
     313           0 :                 UChar32 c = right[rightIndex++];
     314           0 :                 rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     315           0 :                 if(rightPair < MIN_LONG) {
     316           0 :                     rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
     317             :                 }
     318           0 :                 rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
     319             :             }
     320             : 
     321           0 :             if(leftPair == rightPair) {
     322           0 :                 if(leftPair == EOS) { break; }
     323           0 :                 leftPair = rightPair = 0;
     324           0 :                 continue;
     325             :             }
     326           0 :             uint32_t leftCase = leftPair & 0xffff;
     327           0 :             uint32_t rightCase = rightPair & 0xffff;
     328           0 :             if(leftCase != rightCase) {
     329           0 :                 if((options & CollationSettings::UPPER_FIRST) == 0) {
     330           0 :                     return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
     331             :                 } else {
     332           0 :                     return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
     333             :                 }
     334             :             }
     335           0 :             if(leftPair == EOS) { break; }
     336           0 :             leftPair >>= 16;
     337           0 :             rightPair >>= 16;
     338           0 :         }
     339             :     }
     340           0 :     if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
     341             : 
     342             :     // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
     343           0 :     UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options);
     344             : 
     345           0 :     leftIndex = rightIndex = 0;
     346           0 :     leftPair = rightPair = 0;
     347             :     for(;;) {
     348           0 :         while(leftPair == 0) {
     349           0 :             if(leftIndex == leftLength) {
     350           0 :                 leftPair = EOS;
     351           0 :                 break;
     352             :             }
     353           0 :             UChar32 c = left[leftIndex++];
     354           0 :             leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     355           0 :             if(leftPair < MIN_LONG) {
     356           0 :                 leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
     357             :             }
     358           0 :             leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
     359             :         }
     360             : 
     361           0 :         while(rightPair == 0) {
     362           0 :             if(rightIndex == rightLength) {
     363           0 :                 rightPair = EOS;
     364           0 :                 break;
     365             :             }
     366           0 :             UChar32 c = right[rightIndex++];
     367           0 :             rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     368           0 :             if(rightPair < MIN_LONG) {
     369           0 :                 rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
     370             :             }
     371           0 :             rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
     372             :         }
     373             : 
     374           0 :         if(leftPair == rightPair) {
     375           0 :             if(leftPair == EOS) { break; }
     376           0 :             leftPair = rightPair = 0;
     377           0 :             continue;
     378             :         }
     379           0 :         uint32_t leftTertiary = leftPair & 0xffff;
     380           0 :         uint32_t rightTertiary = rightPair & 0xffff;
     381           0 :         if(leftTertiary != rightTertiary) {
     382           0 :             if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
     383             :                 // Pass through EOS and MERGE_WEIGHT
     384             :                 // and keep real tertiary weights larger than the MERGE_WEIGHT.
     385             :                 // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
     386           0 :                 if(leftTertiary > MERGE_WEIGHT) {
     387           0 :                     leftTertiary ^= CASE_MASK;
     388             :                 }
     389           0 :                 if(rightTertiary > MERGE_WEIGHT) {
     390           0 :                     rightTertiary ^= CASE_MASK;
     391             :                 }
     392             :             }
     393           0 :             return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
     394             :         }
     395           0 :         if(leftPair == EOS) { break; }
     396           0 :         leftPair >>= 16;
     397           0 :         rightPair >>= 16;
     398           0 :     }
     399           0 :     if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
     400             : 
     401           0 :     leftIndex = rightIndex = 0;
     402           0 :     leftPair = rightPair = 0;
     403             :     for(;;) {
     404           0 :         while(leftPair == 0) {
     405           0 :             if(leftIndex == leftLength) {
     406           0 :                 leftPair = EOS;
     407           0 :                 break;
     408             :             }
     409           0 :             UChar32 c = left[leftIndex++];
     410           0 :             leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     411           0 :             if(leftPair < MIN_LONG) {
     412           0 :                 leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
     413             :             }
     414           0 :             leftPair = getQuaternaries(variableTop, leftPair);
     415             :         }
     416             : 
     417           0 :         while(rightPair == 0) {
     418           0 :             if(rightIndex == rightLength) {
     419           0 :                 rightPair = EOS;
     420           0 :                 break;
     421             :             }
     422           0 :             UChar32 c = right[rightIndex++];
     423           0 :             rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
     424           0 :             if(rightPair < MIN_LONG) {
     425           0 :                 rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
     426             :             }
     427           0 :             rightPair = getQuaternaries(variableTop, rightPair);
     428             :         }
     429             : 
     430           0 :         if(leftPair == rightPair) {
     431           0 :             if(leftPair == EOS) { break; }
     432           0 :             leftPair = rightPair = 0;
     433           0 :             continue;
     434             :         }
     435           0 :         uint32_t leftQuaternary = leftPair & 0xffff;
     436           0 :         uint32_t rightQuaternary = rightPair & 0xffff;
     437           0 :         if(leftQuaternary != rightQuaternary) {
     438           0 :             return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
     439             :         }
     440           0 :         if(leftPair == EOS) { break; }
     441           0 :         leftPair >>= 16;
     442           0 :         rightPair >>= 16;
     443           0 :     }
     444           0 :     return UCOL_EQUAL;
     445             : }
     446             : 
     447             : int32_t
     448           0 : CollationFastLatin::compareUTF8(const uint16_t *table, const uint16_t *primaries, int32_t options,
     449             :                                  const uint8_t *left, int32_t leftLength,
     450             :                                  const uint8_t *right, int32_t rightLength) {
     451             :     // Keep compareUTF16() and compareUTF8() in sync very closely!
     452             : 
     453           0 :     U_ASSERT((table[0] >> 8) == VERSION);
     454           0 :     table += (table[0] & 0xff);  // skip the header
     455           0 :     uint32_t variableTop = (uint32_t)options >> 16;  // see RuleBasedCollator::getFastLatinOptions()
     456           0 :     options &= 0xffff;  // needed for CollationSettings::getStrength() to work
     457             : 
     458             :     // Check for supported characters, fetch mini CEs, and compare primaries.
     459           0 :     int32_t leftIndex = 0, rightIndex = 0;
     460             :     /**
     461             :      * Single mini CE or a pair.
     462             :      * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
     463             :      * If there is only one, then it is in the lower bits, and the upper bits are 0.
     464             :      */
     465           0 :     uint32_t leftPair = 0, rightPair = 0;
     466             :     // Note: There is no need to assemble the code point.
     467             :     // We only need to look up the table entry for the character,
     468             :     // and nextPair() looks for whether c==0.
     469             :     for(;;) {
     470             :         // We fetch CEs until we get a non-ignorable primary or reach the end.
     471           0 :         while(leftPair == 0) {
     472           0 :             if(leftIndex == leftLength) {
     473           0 :                 leftPair = EOS;
     474           0 :                 break;
     475             :             }
     476           0 :             UChar32 c = left[leftIndex++];
     477             :             uint8_t t;
     478           0 :             if(c <= 0x7f) {
     479           0 :                 leftPair = primaries[c];
     480           0 :                 if(leftPair != 0) { break; }
     481           0 :                 if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
     482           0 :                     return BAIL_OUT_RESULT;
     483             :                 }
     484           0 :                 leftPair = table[c];
     485           0 :             } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && leftIndex != leftLength &&
     486           0 :                     0x80 <= (t = left[leftIndex]) && t <= 0xbf) {
     487           0 :                 ++leftIndex;
     488           0 :                 c = ((c - 0xc2) << 6) + t;
     489           0 :                 leftPair = primaries[c];
     490           0 :                 if(leftPair != 0) { break; }
     491           0 :                 leftPair = table[c];
     492             :             } else {
     493           0 :                 leftPair = lookupUTF8(table, c, left, leftIndex, leftLength);
     494             :             }
     495           0 :             if(leftPair >= MIN_SHORT) {
     496           0 :                 leftPair &= SHORT_PRIMARY_MASK;
     497           0 :                 break;
     498           0 :             } else if(leftPair > variableTop) {
     499           0 :                 leftPair &= LONG_PRIMARY_MASK;
     500           0 :                 break;
     501             :             } else {
     502           0 :                 leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
     503           0 :                 if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
     504           0 :                 leftPair = getPrimaries(variableTop, leftPair);
     505             :             }
     506             :         }
     507             : 
     508           0 :         while(rightPair == 0) {
     509           0 :             if(rightIndex == rightLength) {
     510           0 :                 rightPair = EOS;
     511           0 :                 break;
     512             :             }
     513           0 :             UChar32 c = right[rightIndex++];
     514             :             uint8_t t;
     515           0 :             if(c <= 0x7f) {
     516           0 :                 rightPair = primaries[c];
     517           0 :                 if(rightPair != 0) { break; }
     518           0 :                 if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
     519           0 :                     return BAIL_OUT_RESULT;
     520             :                 }
     521           0 :                 rightPair = table[c];
     522           0 :             } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && rightIndex != rightLength &&
     523           0 :                     0x80 <= (t = right[rightIndex]) && t <= 0xbf) {
     524           0 :                 ++rightIndex;
     525           0 :                 c = ((c - 0xc2) << 6) + t;
     526           0 :                 rightPair = primaries[c];
     527           0 :                 if(rightPair != 0) { break; }
     528           0 :                 rightPair = table[c];
     529             :             } else {
     530           0 :                 rightPair = lookupUTF8(table, c, right, rightIndex, rightLength);
     531             :             }
     532           0 :             if(rightPair >= MIN_SHORT) {
     533           0 :                 rightPair &= SHORT_PRIMARY_MASK;
     534           0 :                 break;
     535           0 :             } else if(rightPair > variableTop) {
     536           0 :                 rightPair &= LONG_PRIMARY_MASK;
     537           0 :                 break;
     538             :             } else {
     539           0 :                 rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
     540           0 :                 if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
     541           0 :                 rightPair = getPrimaries(variableTop, rightPair);
     542             :             }
     543             :         }
     544             : 
     545           0 :         if(leftPair == rightPair) {
     546           0 :             if(leftPair == EOS) { break; }
     547           0 :             leftPair = rightPair = 0;
     548           0 :             continue;
     549             :         }
     550           0 :         uint32_t leftPrimary = leftPair & 0xffff;
     551           0 :         uint32_t rightPrimary = rightPair & 0xffff;
     552           0 :         if(leftPrimary != rightPrimary) {
     553             :             // Return the primary difference.
     554           0 :             return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
     555             :         }
     556           0 :         if(leftPair == EOS) { break; }
     557           0 :         leftPair >>= 16;
     558           0 :         rightPair >>= 16;
     559           0 :     }
     560             :     // In the following, we need to re-fetch each character because we did not buffer the CEs,
     561             :     // but we know that the string is well-formed and
     562             :     // only contains supported characters and mappings.
     563             : 
     564             :     // We might skip the secondary level but continue with the case level
     565             :     // which is turned on separately.
     566           0 :     if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
     567           0 :         leftIndex = rightIndex = 0;
     568           0 :         leftPair = rightPair = 0;
     569             :         for(;;) {
     570           0 :             while(leftPair == 0) {
     571           0 :                 if(leftIndex == leftLength) {
     572           0 :                     leftPair = EOS;
     573           0 :                     break;
     574             :                 }
     575           0 :                 UChar32 c = left[leftIndex++];
     576           0 :                 if(c <= 0x7f) {
     577           0 :                     leftPair = table[c];
     578           0 :                 } else if(c <= LATIN_MAX_UTF8_LEAD) {
     579           0 :                     leftPair = table[((c - 0xc2) << 6) + left[leftIndex++]];
     580             :                 } else {
     581           0 :                     leftPair = lookupUTF8Unsafe(table, c, left, leftIndex);
     582             :                 }
     583           0 :                 if(leftPair >= MIN_SHORT) {
     584           0 :                     leftPair = getSecondariesFromOneShortCE(leftPair);
     585           0 :                     break;
     586           0 :                 } else if(leftPair > variableTop) {
     587           0 :                     leftPair = COMMON_SEC_PLUS_OFFSET;
     588           0 :                     break;
     589             :                 } else {
     590           0 :                     leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
     591           0 :                     leftPair = getSecondaries(variableTop, leftPair);
     592             :                 }
     593             :             }
     594             : 
     595           0 :             while(rightPair == 0) {
     596           0 :                 if(rightIndex == rightLength) {
     597           0 :                     rightPair = EOS;
     598           0 :                     break;
     599             :                 }
     600           0 :                 UChar32 c = right[rightIndex++];
     601           0 :                 if(c <= 0x7f) {
     602           0 :                     rightPair = table[c];
     603           0 :                 } else if(c <= LATIN_MAX_UTF8_LEAD) {
     604           0 :                     rightPair = table[((c - 0xc2) << 6) + right[rightIndex++]];
     605             :                 } else {
     606           0 :                     rightPair = lookupUTF8Unsafe(table, c, right, rightIndex);
     607             :                 }
     608           0 :                 if(rightPair >= MIN_SHORT) {
     609           0 :                     rightPair = getSecondariesFromOneShortCE(rightPair);
     610           0 :                     break;
     611           0 :                 } else if(rightPair > variableTop) {
     612           0 :                     rightPair = COMMON_SEC_PLUS_OFFSET;
     613           0 :                     break;
     614             :                 } else {
     615           0 :                     rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
     616           0 :                     rightPair = getSecondaries(variableTop, rightPair);
     617             :                 }
     618             :             }
     619             : 
     620           0 :             if(leftPair == rightPair) {
     621           0 :                 if(leftPair == EOS) { break; }
     622           0 :                 leftPair = rightPair = 0;
     623           0 :                 continue;
     624             :             }
     625           0 :             uint32_t leftSecondary = leftPair & 0xffff;
     626           0 :             uint32_t rightSecondary = rightPair & 0xffff;
     627           0 :             if(leftSecondary != rightSecondary) {
     628           0 :                 if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
     629             :                     // Full support for backwards secondary requires backwards contraction matching
     630             :                     // and moving backwards between merge separators.
     631           0 :                     return BAIL_OUT_RESULT;
     632             :                 }
     633           0 :                 return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
     634             :             }
     635           0 :             if(leftPair == EOS) { break; }
     636           0 :             leftPair >>= 16;
     637           0 :             rightPair >>= 16;
     638           0 :         }
     639             :     }
     640             : 
     641           0 :     if((options & CollationSettings::CASE_LEVEL) != 0) {
     642           0 :         UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY;
     643           0 :         leftIndex = rightIndex = 0;
     644           0 :         leftPair = rightPair = 0;
     645             :         for(;;) {
     646           0 :             while(leftPair == 0) {
     647           0 :                 if(leftIndex == leftLength) {
     648           0 :                     leftPair = EOS;
     649           0 :                     break;
     650             :                 }
     651           0 :                 UChar32 c = left[leftIndex++];
     652           0 :                 leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
     653           0 :                 if(leftPair < MIN_LONG) {
     654           0 :                     leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
     655             :                 }
     656           0 :                 leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
     657             :             }
     658             : 
     659           0 :             while(rightPair == 0) {
     660           0 :                 if(rightIndex == rightLength) {
     661           0 :                     rightPair = EOS;
     662           0 :                     break;
     663             :                 }
     664           0 :                 UChar32 c = right[rightIndex++];
     665           0 :                 rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
     666           0 :                 if(rightPair < MIN_LONG) {
     667           0 :                     rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
     668             :                 }
     669           0 :                 rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
     670             :             }
     671             : 
     672           0 :             if(leftPair == rightPair) {
     673           0 :                 if(leftPair == EOS) { break; }
     674           0 :                 leftPair = rightPair = 0;
     675           0 :                 continue;
     676             :             }
     677           0 :             uint32_t leftCase = leftPair & 0xffff;
     678           0 :             uint32_t rightCase = rightPair & 0xffff;
     679           0 :             if(leftCase != rightCase) {
     680           0 :                 if((options & CollationSettings::UPPER_FIRST) == 0) {
     681           0 :                     return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
     682             :                 } else {
     683           0 :                     return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
     684             :                 }
     685             :             }
     686           0 :             if(leftPair == EOS) { break; }
     687           0 :             leftPair >>= 16;
     688           0 :             rightPair >>= 16;
     689           0 :         }
     690             :     }
     691           0 :     if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
     692             : 
     693             :     // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
     694           0 :     UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options);
     695             : 
     696           0 :     leftIndex = rightIndex = 0;
     697           0 :     leftPair = rightPair = 0;
     698             :     for(;;) {
     699           0 :         while(leftPair == 0) {
     700           0 :             if(leftIndex == leftLength) {
     701           0 :                 leftPair = EOS;
     702           0 :                 break;
     703             :             }
     704           0 :             UChar32 c = left[leftIndex++];
     705           0 :             leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
     706           0 :             if(leftPair < MIN_LONG) {
     707           0 :                 leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
     708             :             }
     709           0 :             leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
     710             :         }
     711             : 
     712           0 :         while(rightPair == 0) {
     713           0 :             if(rightIndex == rightLength) {
     714           0 :                 rightPair = EOS;
     715           0 :                 break;
     716             :             }
     717           0 :             UChar32 c = right[rightIndex++];
     718           0 :             rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
     719           0 :             if(rightPair < MIN_LONG) {
     720           0 :                 rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
     721             :             }
     722           0 :             rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
     723             :         }
     724             : 
     725           0 :         if(leftPair == rightPair) {
     726           0 :             if(leftPair == EOS) { break; }
     727           0 :             leftPair = rightPair = 0;
     728           0 :             continue;
     729             :         }
     730           0 :         uint32_t leftTertiary = leftPair & 0xffff;
     731           0 :         uint32_t rightTertiary = rightPair & 0xffff;
     732           0 :         if(leftTertiary != rightTertiary) {
     733           0 :             if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
     734             :                 // Pass through EOS and MERGE_WEIGHT
     735             :                 // and keep real tertiary weights larger than the MERGE_WEIGHT.
     736             :                 // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
     737           0 :                 if(leftTertiary > MERGE_WEIGHT) {
     738           0 :                     leftTertiary ^= CASE_MASK;
     739             :                 }
     740           0 :                 if(rightTertiary > MERGE_WEIGHT) {
     741           0 :                     rightTertiary ^= CASE_MASK;
     742             :                 }
     743             :             }
     744           0 :             return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
     745             :         }
     746           0 :         if(leftPair == EOS) { break; }
     747           0 :         leftPair >>= 16;
     748           0 :         rightPair >>= 16;
     749           0 :     }
     750           0 :     if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
     751             : 
     752           0 :     leftIndex = rightIndex = 0;
     753           0 :     leftPair = rightPair = 0;
     754             :     for(;;) {
     755           0 :         while(leftPair == 0) {
     756           0 :             if(leftIndex == leftLength) {
     757           0 :                 leftPair = EOS;
     758           0 :                 break;
     759             :             }
     760           0 :             UChar32 c = left[leftIndex++];
     761           0 :             leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
     762           0 :             if(leftPair < MIN_LONG) {
     763           0 :                 leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
     764             :             }
     765           0 :             leftPair = getQuaternaries(variableTop, leftPair);
     766             :         }
     767             : 
     768           0 :         while(rightPair == 0) {
     769           0 :             if(rightIndex == rightLength) {
     770           0 :                 rightPair = EOS;
     771           0 :                 break;
     772             :             }
     773           0 :             UChar32 c = right[rightIndex++];
     774           0 :             rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
     775           0 :             if(rightPair < MIN_LONG) {
     776           0 :                 rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
     777             :             }
     778           0 :             rightPair = getQuaternaries(variableTop, rightPair);
     779             :         }
     780             : 
     781           0 :         if(leftPair == rightPair) {
     782           0 :             if(leftPair == EOS) { break; }
     783           0 :             leftPair = rightPair = 0;
     784           0 :             continue;
     785             :         }
     786           0 :         uint32_t leftQuaternary = leftPair & 0xffff;
     787           0 :         uint32_t rightQuaternary = rightPair & 0xffff;
     788           0 :         if(leftQuaternary != rightQuaternary) {
     789           0 :             return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
     790             :         }
     791           0 :         if(leftPair == EOS) { break; }
     792           0 :         leftPair >>= 16;
     793           0 :         rightPair >>= 16;
     794           0 :     }
     795           0 :     return UCOL_EQUAL;
     796             : }
     797             : 
     798             : uint32_t
     799           0 : CollationFastLatin::lookup(const uint16_t *table, UChar32 c) {
     800           0 :     U_ASSERT(c > LATIN_MAX);
     801           0 :     if(PUNCT_START <= c && c < PUNCT_LIMIT) {
     802           0 :         return table[c - PUNCT_START + LATIN_LIMIT];
     803           0 :     } else if(c == 0xfffe) {
     804           0 :         return MERGE_WEIGHT;
     805           0 :     } else if(c == 0xffff) {
     806           0 :         return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;
     807             :     } else {
     808           0 :         return BAIL_OUT;
     809             :     }
     810             : }
     811             : 
     812             : uint32_t
     813           0 : CollationFastLatin::lookupUTF8(const uint16_t *table, UChar32 c,
     814             :                                const uint8_t *s8, int32_t &sIndex, int32_t sLength) {
     815             :     // The caller handled ASCII and valid/supported Latin.
     816           0 :     U_ASSERT(c > 0x7f);
     817           0 :     int32_t i2 = sIndex + 1;
     818           0 :     if(i2 < sLength || sLength < 0) {
     819           0 :         uint8_t t1 = s8[sIndex];
     820           0 :         uint8_t t2 = s8[i2];
     821           0 :         sIndex += 2;
     822           0 :         if(c == 0xe2 && t1 == 0x80 && 0x80 <= t2 && t2 <= 0xbf) {
     823           0 :             return table[(LATIN_LIMIT - 0x80) + t2];  // 2000..203F -> 0180..01BF
     824           0 :         } else if(c == 0xef && t1 == 0xbf) {
     825           0 :             if(t2 == 0xbe) {
     826           0 :                 return MERGE_WEIGHT;  // U+FFFE
     827           0 :             } else if(t2 == 0xbf) {
     828           0 :                 return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;  // U+FFFF
     829             :             }
     830             :         }
     831             :     }
     832           0 :     return BAIL_OUT;
     833             : }
     834             : 
     835             : uint32_t
     836           0 : CollationFastLatin::lookupUTF8Unsafe(const uint16_t *table, UChar32 c,
     837             :                                      const uint8_t *s8, int32_t &sIndex) {
     838             :     // The caller handled ASCII.
     839             :     // The string is well-formed and contains only supported characters.
     840           0 :     U_ASSERT(c > 0x7f);
     841           0 :     if(c <= LATIN_MAX_UTF8_LEAD) {
     842           0 :         return table[((c - 0xc2) << 6) + s8[sIndex++]];  // 0080..017F
     843             :     }
     844           0 :     uint8_t t2 = s8[sIndex + 1];
     845           0 :     sIndex += 2;
     846           0 :     if(c == 0xe2) {
     847           0 :         return table[(LATIN_LIMIT - 0x80) + t2];  // 2000..203F -> 0180..01BF
     848           0 :     } else if(t2 == 0xbe) {
     849           0 :         return MERGE_WEIGHT;  // U+FFFE
     850             :     } else {
     851           0 :         return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;  // U+FFFF
     852             :     }
     853             : }
     854             : 
     855             : uint32_t
     856           0 : CollationFastLatin::nextPair(const uint16_t *table, UChar32 c, uint32_t ce,
     857             :                              const UChar *s16, const uint8_t *s8, int32_t &sIndex, int32_t &sLength) {
     858           0 :     if(ce >= MIN_LONG || ce < CONTRACTION) {
     859           0 :         return ce;  // simple or special mini CE
     860           0 :     } else if(ce >= EXPANSION) {
     861           0 :         int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK);
     862           0 :         return ((uint32_t)table[index + 1] << 16) | table[index];
     863             :     } else /* ce >= CONTRACTION */ {
     864           0 :         if(c == 0 && sLength < 0) {
     865           0 :             sLength = sIndex - 1;
     866           0 :             return EOS;
     867             :         }
     868             :         // Contraction list: Default mapping followed by
     869             :         // 0 or more single-character contraction suffix mappings.
     870           0 :         int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK);
     871           0 :         if(sIndex != sLength) {
     872             :             // Read the next character.
     873             :             int32_t c2;
     874           0 :             int32_t nextIndex = sIndex;
     875           0 :             if(s16 != NULL) {
     876           0 :                 c2 = s16[nextIndex++];
     877           0 :                 if(c2 > LATIN_MAX) {
     878           0 :                     if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) {
     879           0 :                         c2 = c2 - PUNCT_START + LATIN_LIMIT;  // 2000..203F -> 0180..01BF
     880           0 :                     } else if(c2 == 0xfffe || c2 == 0xffff) {
     881           0 :                         c2 = -1;  // U+FFFE & U+FFFF cannot occur in contractions.
     882             :                     } else {
     883           0 :                         return BAIL_OUT;
     884             :                     }
     885             :                 }
     886             :             } else {
     887           0 :                 c2 = s8[nextIndex++];
     888           0 :                 if(c2 > 0x7f) {
     889             :                     uint8_t t;
     890           0 :                     if(c2 <= 0xc5 && 0xc2 <= c2 && nextIndex != sLength &&
     891           0 :                             0x80 <= (t = s8[nextIndex]) && t <= 0xbf) {
     892           0 :                         c2 = ((c2 - 0xc2) << 6) + t;  // 0080..017F
     893           0 :                         ++nextIndex;
     894             :                     } else {
     895           0 :                         int32_t i2 = nextIndex + 1;
     896           0 :                         if(i2 < sLength || sLength < 0) {
     897           0 :                             if(c2 == 0xe2 && s8[nextIndex] == 0x80 &&
     898           0 :                                     0x80 <= (t = s8[i2]) && t <= 0xbf) {
     899           0 :                                 c2 = (LATIN_LIMIT - 0x80) + t;  // 2000..203F -> 0180..01BF
     900           0 :                             } else if(c2 == 0xef && s8[nextIndex] == 0xbf &&
     901           0 :                                     ((t = s8[i2]) == 0xbe || t == 0xbf)) {
     902           0 :                                 c2 = -1;  // U+FFFE & U+FFFF cannot occur in contractions.
     903             :                             } else {
     904           0 :                                 return BAIL_OUT;
     905             :                             }
     906             :                         } else {
     907           0 :                             return BAIL_OUT;
     908             :                         }
     909           0 :                         nextIndex += 2;
     910             :                     }
     911             :                 }
     912             :             }
     913           0 :             if(c2 == 0 && sLength < 0) {
     914           0 :                 sLength = sIndex;
     915           0 :                 c2 = -1;
     916             :             }
     917             :             // Look for the next character in the contraction suffix list,
     918             :             // which is in ascending order of single suffix characters.
     919           0 :             int32_t i = index;
     920           0 :             int32_t head = table[i];  // first skip the default mapping
     921             :             int32_t x;
     922           0 :             do {
     923           0 :                 i += head >> CONTR_LENGTH_SHIFT;
     924           0 :                 head = table[i];
     925           0 :                 x = head & CONTR_CHAR_MASK;
     926           0 :             } while(x < c2);
     927           0 :             if(x == c2) {
     928           0 :                 index = i;
     929           0 :                 sIndex = nextIndex;
     930             :             }
     931             :         }
     932             :         // Return the CE or CEs for the default or contraction mapping.
     933           0 :         int32_t length = table[index] >> CONTR_LENGTH_SHIFT;
     934           0 :         if(length == 1) {
     935           0 :             return BAIL_OUT;
     936             :         }
     937           0 :         ce = table[index + 1];
     938           0 :         if(length == 2) {
     939           0 :             return ce;
     940             :         } else {
     941           0 :             return ((uint32_t)table[index + 2] << 16) | ce;
     942             :         }
     943             :     }
     944             : }
     945             : 
     946             : uint32_t
     947           0 : CollationFastLatin::getSecondaries(uint32_t variableTop, uint32_t pair) {
     948           0 :     if(pair <= 0xffff) {
     949             :         // one mini CE
     950           0 :         if(pair >= MIN_SHORT) {
     951           0 :             pair = getSecondariesFromOneShortCE(pair);
     952           0 :         } else if(pair > variableTop) {
     953           0 :             pair = COMMON_SEC_PLUS_OFFSET;
     954           0 :         } else if(pair >= MIN_LONG) {
     955           0 :             pair = 0;  // variable
     956             :         }
     957             :         // else special mini CE
     958             :     } else {
     959           0 :         uint32_t ce = pair & 0xffff;
     960           0 :         if(ce >= MIN_SHORT) {
     961           0 :             pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS;
     962           0 :         } else if(ce > variableTop) {
     963           0 :             pair = TWO_COMMON_SEC_PLUS_OFFSET;
     964             :         } else {
     965           0 :             U_ASSERT(ce >= MIN_LONG);
     966           0 :             pair = 0;  // variable
     967             :         }
     968             :     }
     969           0 :     return pair;
     970             : }
     971             : 
     972             : uint32_t
     973           0 : CollationFastLatin::getCases(uint32_t variableTop, UBool strengthIsPrimary, uint32_t pair) {
     974             :     // Primary+caseLevel: Ignore case level weights of primary ignorables.
     975             :     // Otherwise: Ignore case level weights of secondary ignorables.
     976             :     // For details see the comments in the CollationCompare class.
     977             :     // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
     978           0 :     if(pair <= 0xffff) {
     979             :         // one mini CE
     980           0 :         if(pair >= MIN_SHORT) {
     981             :             // A high secondary weight means we really have two CEs,
     982             :             // a primary CE and a secondary CE.
     983           0 :             uint32_t ce = pair;
     984           0 :             pair &= CASE_MASK;  // explicit weight of primary CE
     985           0 :             if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
     986           0 :                 pair |= LOWER_CASE << 16;  // implied weight of secondary CE
     987             :             }
     988           0 :         } else if(pair > variableTop) {
     989           0 :             pair = LOWER_CASE;
     990           0 :         } else if(pair >= MIN_LONG) {
     991           0 :             pair = 0;  // variable
     992             :         }
     993             :         // else special mini CE
     994             :     } else {
     995             :         // two mini CEs, same primary groups, neither expands like above
     996           0 :         uint32_t ce = pair & 0xffff;
     997           0 :         if(ce >= MIN_SHORT) {
     998           0 :             if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) {
     999           0 :                 pair &= CASE_MASK;
    1000             :             } else {
    1001           0 :                 pair &= TWO_CASES_MASK;
    1002             :             }
    1003           0 :         } else if(ce > variableTop) {
    1004           0 :             pair = TWO_LOWER_CASES;
    1005             :         } else {
    1006           0 :             U_ASSERT(ce >= MIN_LONG);
    1007           0 :             pair = 0;  // variable
    1008             :         }
    1009             :     }
    1010           0 :     return pair;
    1011             : }
    1012             : 
    1013             : uint32_t
    1014           0 : CollationFastLatin::getTertiaries(uint32_t variableTop, UBool withCaseBits, uint32_t pair) {
    1015           0 :     if(pair <= 0xffff) {
    1016             :         // one mini CE
    1017           0 :         if(pair >= MIN_SHORT) {
    1018             :             // A high secondary weight means we really have two CEs,
    1019             :             // a primary CE and a secondary CE.
    1020           0 :             uint32_t ce = pair;
    1021           0 :             if(withCaseBits) {
    1022           0 :                 pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET;
    1023           0 :                 if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    1024           0 :                     pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16;
    1025             :                 }
    1026             :             } else {
    1027           0 :                 pair = (pair & TERTIARY_MASK) + TER_OFFSET;
    1028           0 :                 if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    1029           0 :                     pair |= COMMON_TER_PLUS_OFFSET << 16;
    1030             :                 }
    1031             :             }
    1032           0 :         } else if(pair > variableTop) {
    1033           0 :             pair = (pair & TERTIARY_MASK) + TER_OFFSET;
    1034           0 :             if(withCaseBits) {
    1035           0 :                 pair |= LOWER_CASE;
    1036             :             }
    1037           0 :         } else if(pair >= MIN_LONG) {
    1038           0 :             pair = 0;  // variable
    1039             :         }
    1040             :         // else special mini CE
    1041             :     } else {
    1042             :         // two mini CEs, same primary groups, neither expands like above
    1043           0 :         uint32_t ce = pair & 0xffff;
    1044           0 :         if(ce >= MIN_SHORT) {
    1045           0 :             if(withCaseBits) {
    1046           0 :                 pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK;
    1047             :             } else {
    1048           0 :                 pair &= TWO_TERTIARIES_MASK;
    1049             :             }
    1050           0 :             pair += TWO_TER_OFFSETS;
    1051           0 :         } else if(ce > variableTop) {
    1052           0 :             pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS;
    1053           0 :             if(withCaseBits) {
    1054           0 :                 pair |= TWO_LOWER_CASES;
    1055             :             }
    1056             :         } else {
    1057           0 :             U_ASSERT(ce >= MIN_LONG);
    1058           0 :             pair = 0;  // variable
    1059             :         }
    1060             :     }
    1061           0 :     return pair;
    1062             : }
    1063             : 
    1064             : uint32_t
    1065           0 : CollationFastLatin::getQuaternaries(uint32_t variableTop, uint32_t pair) {
    1066             :     // Return the primary weight of a variable CE,
    1067             :     // or the maximum primary weight for a non-variable, not-completely-ignorable CE.
    1068           0 :     if(pair <= 0xffff) {
    1069             :         // one mini CE
    1070           0 :         if(pair >= MIN_SHORT) {
    1071             :             // A high secondary weight means we really have two CEs,
    1072             :             // a primary CE and a secondary CE.
    1073           0 :             if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) {
    1074           0 :                 pair = TWO_SHORT_PRIMARIES_MASK;
    1075             :             } else {
    1076           0 :                 pair = SHORT_PRIMARY_MASK;
    1077             :             }
    1078           0 :         } else if(pair > variableTop) {
    1079           0 :             pair = SHORT_PRIMARY_MASK;
    1080           0 :         } else if(pair >= MIN_LONG) {
    1081           0 :             pair &= LONG_PRIMARY_MASK;  // variable
    1082             :         }
    1083             :         // else special mini CE
    1084             :     } else {
    1085             :         // two mini CEs, same primary groups, neither expands like above
    1086           0 :         uint32_t ce = pair & 0xffff;
    1087           0 :         if(ce > variableTop) {
    1088           0 :             pair = TWO_SHORT_PRIMARIES_MASK;
    1089             :         } else {
    1090           0 :             U_ASSERT(ce >= MIN_LONG);
    1091           0 :             pair &= TWO_LONG_PRIMARIES_MASK;  // variable
    1092             :         }
    1093             :     }
    1094           0 :     return pair;
    1095             : }
    1096             : 
    1097             : U_NAMESPACE_END
    1098             : 
    1099             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13