LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - rulebasedcollator.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 900 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 102 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 1996-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * rulebasedcollator.cpp
       9             : *
      10             : * (replaced the former tblcoll.cpp)
      11             : *
      12             : * created on: 2012feb14 with new and old collation code
      13             : * created by: Markus W. Scherer
      14             : */
      15             : 
      16             : #include "unicode/utypes.h"
      17             : 
      18             : #if !UCONFIG_NO_COLLATION
      19             : 
      20             : #include "unicode/coll.h"
      21             : #include "unicode/coleitr.h"
      22             : #include "unicode/localpointer.h"
      23             : #include "unicode/locid.h"
      24             : #include "unicode/sortkey.h"
      25             : #include "unicode/tblcoll.h"
      26             : #include "unicode/ucol.h"
      27             : #include "unicode/uiter.h"
      28             : #include "unicode/uloc.h"
      29             : #include "unicode/uniset.h"
      30             : #include "unicode/unistr.h"
      31             : #include "unicode/usetiter.h"
      32             : #include "unicode/utf8.h"
      33             : #include "unicode/uversion.h"
      34             : #include "bocsu.h"
      35             : #include "charstr.h"
      36             : #include "cmemory.h"
      37             : #include "collation.h"
      38             : #include "collationcompare.h"
      39             : #include "collationdata.h"
      40             : #include "collationdatareader.h"
      41             : #include "collationfastlatin.h"
      42             : #include "collationiterator.h"
      43             : #include "collationkeys.h"
      44             : #include "collationroot.h"
      45             : #include "collationsets.h"
      46             : #include "collationsettings.h"
      47             : #include "collationtailoring.h"
      48             : #include "cstring.h"
      49             : #include "uassert.h"
      50             : #include "ucol_imp.h"
      51             : #include "uhash.h"
      52             : #include "uitercollationiterator.h"
      53             : #include "ustr_imp.h"
      54             : #include "utf16collationiterator.h"
      55             : #include "utf8collationiterator.h"
      56             : #include "uvectr64.h"
      57             : 
      58             : U_NAMESPACE_BEGIN
      59             : 
      60             : namespace {
      61             : 
      62             : class FixedSortKeyByteSink : public SortKeyByteSink {
      63             : public:
      64           0 :     FixedSortKeyByteSink(char *dest, int32_t destCapacity)
      65           0 :             : SortKeyByteSink(dest, destCapacity) {}
      66             :     virtual ~FixedSortKeyByteSink();
      67             : 
      68             : private:
      69             :     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
      70             :     virtual UBool Resize(int32_t appendCapacity, int32_t length);
      71             : };
      72             : 
      73           0 : FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
      74             : 
      75             : void
      76           0 : FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
      77             :     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
      78             :     // Fill the buffer completely.
      79           0 :     int32_t available = capacity_ - length;
      80           0 :     if (available > 0) {
      81           0 :         uprv_memcpy(buffer_ + length, bytes, available);
      82             :     }
      83           0 : }
      84             : 
      85             : UBool
      86           0 : FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
      87           0 :     return FALSE;
      88             : }
      89             : 
      90             : }  // namespace
      91             : 
      92             : // Not in an anonymous namespace, so that it can be a friend of CollationKey.
      93             : class CollationKeyByteSink : public SortKeyByteSink {
      94             : public:
      95           0 :     CollationKeyByteSink(CollationKey &key)
      96           0 :             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
      97           0 :               key_(key) {}
      98             :     virtual ~CollationKeyByteSink();
      99             : 
     100             : private:
     101             :     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
     102             :     virtual UBool Resize(int32_t appendCapacity, int32_t length);
     103             : 
     104             :     CollationKey &key_;
     105             : };
     106             : 
     107           0 : CollationKeyByteSink::~CollationKeyByteSink() {}
     108             : 
     109             : void
     110           0 : CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
     111             :     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
     112           0 :     if (Resize(n, length)) {
     113           0 :         uprv_memcpy(buffer_ + length, bytes, n);
     114             :     }
     115           0 : }
     116             : 
     117             : UBool
     118           0 : CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
     119           0 :     if (buffer_ == NULL) {
     120           0 :         return FALSE;  // allocation failed before already
     121             :     }
     122           0 :     int32_t newCapacity = 2 * capacity_;
     123           0 :     int32_t altCapacity = length + 2 * appendCapacity;
     124           0 :     if (newCapacity < altCapacity) {
     125           0 :         newCapacity = altCapacity;
     126             :     }
     127           0 :     if (newCapacity < 200) {
     128           0 :         newCapacity = 200;
     129             :     }
     130           0 :     uint8_t *newBuffer = key_.reallocate(newCapacity, length);
     131           0 :     if (newBuffer == NULL) {
     132           0 :         SetNotOk();
     133           0 :         return FALSE;
     134             :     }
     135           0 :     buffer_ = reinterpret_cast<char *>(newBuffer);
     136           0 :     capacity_ = newCapacity;
     137           0 :     return TRUE;
     138             : }
     139             : 
     140           0 : RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
     141             :         : Collator(other),
     142           0 :           data(other.data),
     143           0 :           settings(other.settings),
     144           0 :           tailoring(other.tailoring),
     145           0 :           cacheEntry(other.cacheEntry),
     146             :           validLocale(other.validLocale),
     147           0 :           explicitlySetAttributes(other.explicitlySetAttributes),
     148           0 :           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
     149           0 :     settings->addRef();
     150           0 :     cacheEntry->addRef();
     151           0 : }
     152             : 
     153           0 : RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
     154           0 :                                      const RuleBasedCollator *base, UErrorCode &errorCode)
     155             :         : data(NULL),
     156             :           settings(NULL),
     157             :           tailoring(NULL),
     158             :           cacheEntry(NULL),
     159             :           validLocale(""),
     160             :           explicitlySetAttributes(0),
     161           0 :           actualLocaleIsSameAsValid(FALSE) {
     162           0 :     if(U_FAILURE(errorCode)) { return; }
     163           0 :     if(bin == NULL || length == 0 || base == NULL) {
     164           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     165           0 :         return;
     166             :     }
     167           0 :     const CollationTailoring *root = CollationRoot::getRoot(errorCode);
     168           0 :     if(U_FAILURE(errorCode)) { return; }
     169           0 :     if(base->tailoring != root) {
     170           0 :         errorCode = U_UNSUPPORTED_ERROR;
     171           0 :         return;
     172             :     }
     173           0 :     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
     174           0 :     if(t.isNull() || t->isBogus()) {
     175           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     176           0 :         return;
     177             :     }
     178           0 :     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
     179           0 :     if(U_FAILURE(errorCode)) { return; }
     180           0 :     t->actualLocale.setToBogus();
     181           0 :     adoptTailoring(t.orphan(), errorCode);
     182             : }
     183             : 
     184           0 : RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
     185           0 :         : data(entry->tailoring->data),
     186           0 :           settings(entry->tailoring->settings),
     187           0 :           tailoring(entry->tailoring),
     188             :           cacheEntry(entry),
     189             :           validLocale(entry->validLocale),
     190             :           explicitlySetAttributes(0),
     191           0 :           actualLocaleIsSameAsValid(FALSE) {
     192           0 :     settings->addRef();
     193           0 :     cacheEntry->addRef();
     194           0 : }
     195             : 
     196           0 : RuleBasedCollator::~RuleBasedCollator() {
     197           0 :     SharedObject::clearPtr(settings);
     198           0 :     SharedObject::clearPtr(cacheEntry);
     199           0 : }
     200             : 
     201             : void
     202           0 : RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
     203           0 :     if(U_FAILURE(errorCode)) {
     204           0 :         t->deleteIfZeroRefCount();
     205           0 :         return;
     206             :     }
     207           0 :     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
     208           0 :     cacheEntry = new CollationCacheEntry(t->actualLocale, t);
     209           0 :     if(cacheEntry == NULL) {
     210           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     211           0 :         t->deleteIfZeroRefCount();
     212           0 :         return;
     213             :     }
     214           0 :     data = t->data;
     215           0 :     settings = t->settings;
     216           0 :     settings->addRef();
     217           0 :     tailoring = t;
     218           0 :     cacheEntry->addRef();
     219           0 :     validLocale = t->actualLocale;
     220           0 :     actualLocaleIsSameAsValid = FALSE;
     221             : }
     222             : 
     223             : Collator *
     224           0 : RuleBasedCollator::clone() const {
     225           0 :     return new RuleBasedCollator(*this);
     226             : }
     227             : 
     228           0 : RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
     229           0 :     if(this == &other) { return *this; }
     230           0 :     SharedObject::copyPtr(other.settings, settings);
     231           0 :     tailoring = other.tailoring;
     232           0 :     SharedObject::copyPtr(other.cacheEntry, cacheEntry);
     233           0 :     data = tailoring->data;
     234           0 :     validLocale = other.validLocale;
     235           0 :     explicitlySetAttributes = other.explicitlySetAttributes;
     236           0 :     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
     237           0 :     return *this;
     238             : }
     239             : 
     240           0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
     241             : 
     242             : UBool
     243           0 : RuleBasedCollator::operator==(const Collator& other) const {
     244           0 :     if(this == &other) { return TRUE; }
     245           0 :     if(!Collator::operator==(other)) { return FALSE; }
     246           0 :     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
     247           0 :     if(*settings != *o.settings) { return FALSE; }
     248           0 :     if(data == o.data) { return TRUE; }
     249           0 :     UBool thisIsRoot = data->base == NULL;
     250           0 :     UBool otherIsRoot = o.data->base == NULL;
     251           0 :     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
     252           0 :     if(thisIsRoot != otherIsRoot) { return FALSE; }
     253           0 :     if((thisIsRoot || !tailoring->rules.isEmpty()) &&
     254           0 :             (otherIsRoot || !o.tailoring->rules.isEmpty())) {
     255             :         // Shortcut: If both collators have valid rule strings, then compare those.
     256           0 :         if(tailoring->rules == o.tailoring->rules) { return TRUE; }
     257             :     }
     258             :     // Different rule strings can result in the same or equivalent tailoring.
     259             :     // The rule strings are optional in ICU resource bundles, although included by default.
     260             :     // cloneBinary() drops the rule string.
     261           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     262           0 :     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
     263           0 :     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
     264           0 :     if(U_FAILURE(errorCode)) { return FALSE; }
     265           0 :     if(*thisTailored != *otherTailored) { return FALSE; }
     266             :     // For completeness, we should compare all of the mappings;
     267             :     // or we should create a list of strings, sort it with one collator,
     268             :     // and check if both collators compare adjacent strings the same
     269             :     // (order & strength, down to quaternary); or similar.
     270             :     // Testing equality of collators seems unusual.
     271           0 :     return TRUE;
     272             : }
     273             : 
     274             : int32_t
     275           0 : RuleBasedCollator::hashCode() const {
     276           0 :     int32_t h = settings->hashCode();
     277           0 :     if(data->base == NULL) { return h; }  // root collator
     278             :     // Do not rely on the rule string, see comments in operator==().
     279           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     280           0 :     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
     281           0 :     if(U_FAILURE(errorCode)) { return 0; }
     282           0 :     UnicodeSetIterator iter(*set);
     283           0 :     while(iter.next() && !iter.isString()) {
     284           0 :         h ^= data->getCE32(iter.getCodepoint());
     285             :     }
     286           0 :     return h;
     287             : }
     288             : 
     289             : void
     290           0 : RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
     291             :                               const Locale &actual) {
     292           0 :     if(actual == tailoring->actualLocale) {
     293           0 :         actualLocaleIsSameAsValid = FALSE;
     294             :     } else {
     295           0 :         U_ASSERT(actual == valid);
     296           0 :         actualLocaleIsSameAsValid = TRUE;
     297             :     }
     298             :     // Do not modify tailoring.actualLocale:
     299             :     // We cannot be sure that that would be thread-safe.
     300           0 :     validLocale = valid;
     301             :     (void)requested;  // Ignore, see also ticket #10477.
     302           0 : }
     303             : 
     304             : Locale
     305           0 : RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
     306           0 :     if(U_FAILURE(errorCode)) {
     307           0 :         return Locale::getRoot();
     308             :     }
     309           0 :     switch(type) {
     310             :     case ULOC_ACTUAL_LOCALE:
     311           0 :         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
     312             :     case ULOC_VALID_LOCALE:
     313           0 :         return validLocale;
     314             :     case ULOC_REQUESTED_LOCALE:
     315             :     default:
     316           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     317           0 :         return Locale::getRoot();
     318             :     }
     319             : }
     320             : 
     321             : const char *
     322           0 : RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
     323           0 :     if(U_FAILURE(errorCode)) {
     324           0 :         return NULL;
     325             :     }
     326             :     const Locale *result;
     327           0 :     switch(type) {
     328             :     case ULOC_ACTUAL_LOCALE:
     329           0 :         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
     330           0 :         break;
     331             :     case ULOC_VALID_LOCALE:
     332           0 :         result = &validLocale;
     333           0 :         break;
     334             :     case ULOC_REQUESTED_LOCALE:
     335             :     default:
     336           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     337           0 :         return NULL;
     338             :     }
     339           0 :     if(result->isBogus()) { return NULL; }
     340           0 :     const char *id = result->getName();
     341           0 :     return id[0] == 0 ? "root" : id;
     342             : }
     343             : 
     344             : const UnicodeString&
     345           0 : RuleBasedCollator::getRules() const {
     346           0 :     return tailoring->rules;
     347             : }
     348             : 
     349             : void
     350           0 : RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
     351           0 :     if(delta == UCOL_TAILORING_ONLY) {
     352           0 :         buffer = tailoring->rules;
     353           0 :         return;
     354             :     }
     355             :     // UCOL_FULL_RULES
     356           0 :     buffer.remove();
     357           0 :     CollationLoader::appendRootRules(buffer);
     358           0 :     buffer.append(tailoring->rules).getTerminatedBuffer();
     359             : }
     360             : 
     361             : void
     362           0 : RuleBasedCollator::getVersion(UVersionInfo version) const {
     363           0 :     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
     364           0 :     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
     365           0 : }
     366             : 
     367             : UnicodeSet *
     368           0 : RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
     369           0 :     if(U_FAILURE(errorCode)) { return NULL; }
     370           0 :     UnicodeSet *tailored = new UnicodeSet();
     371           0 :     if(tailored == NULL) {
     372           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     373           0 :         return NULL;
     374             :     }
     375           0 :     if(data->base != NULL) {
     376           0 :         TailoredSet(tailored).forData(data, errorCode);
     377           0 :         if(U_FAILURE(errorCode)) {
     378           0 :             delete tailored;
     379           0 :             return NULL;
     380             :         }
     381             :     }
     382           0 :     return tailored;
     383             : }
     384             : 
     385             : void
     386           0 : RuleBasedCollator::internalGetContractionsAndExpansions(
     387             :         UnicodeSet *contractions, UnicodeSet *expansions,
     388             :         UBool addPrefixes, UErrorCode &errorCode) const {
     389           0 :     if(U_FAILURE(errorCode)) { return; }
     390           0 :     if(contractions != NULL) {
     391           0 :         contractions->clear();
     392             :     }
     393           0 :     if(expansions != NULL) {
     394           0 :         expansions->clear();
     395             :     }
     396           0 :     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
     397             : }
     398             : 
     399             : void
     400           0 : RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
     401           0 :     if(U_FAILURE(errorCode)) { return; }
     402           0 :     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
     403             : }
     404             : 
     405             : const CollationSettings &
     406           0 : RuleBasedCollator::getDefaultSettings() const {
     407           0 :     return *tailoring->settings;
     408             : }
     409             : 
     410             : UColAttributeValue
     411           0 : RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
     412           0 :     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
     413             :     int32_t option;
     414           0 :     switch(attr) {
     415             :     case UCOL_FRENCH_COLLATION:
     416           0 :         option = CollationSettings::BACKWARD_SECONDARY;
     417           0 :         break;
     418             :     case UCOL_ALTERNATE_HANDLING:
     419           0 :         return settings->getAlternateHandling();
     420             :     case UCOL_CASE_FIRST:
     421           0 :         return settings->getCaseFirst();
     422             :     case UCOL_CASE_LEVEL:
     423           0 :         option = CollationSettings::CASE_LEVEL;
     424           0 :         break;
     425             :     case UCOL_NORMALIZATION_MODE:
     426           0 :         option = CollationSettings::CHECK_FCD;
     427           0 :         break;
     428             :     case UCOL_STRENGTH:
     429           0 :         return (UColAttributeValue)settings->getStrength();
     430             :     case UCOL_HIRAGANA_QUATERNARY_MODE:
     431             :         // Deprecated attribute, unsettable.
     432           0 :         return UCOL_OFF;
     433             :     case UCOL_NUMERIC_COLLATION:
     434           0 :         option = CollationSettings::NUMERIC;
     435           0 :         break;
     436             :     default:
     437           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     438           0 :         return UCOL_DEFAULT;
     439             :     }
     440           0 :     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
     441             : }
     442             : 
     443             : void
     444           0 : RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
     445             :                                 UErrorCode &errorCode) {
     446           0 :     UColAttributeValue oldValue = getAttribute(attr, errorCode);
     447           0 :     if(U_FAILURE(errorCode)) { return; }
     448           0 :     if(value == oldValue) {
     449           0 :         setAttributeExplicitly(attr);
     450           0 :         return;
     451             :     }
     452           0 :     const CollationSettings &defaultSettings = getDefaultSettings();
     453           0 :     if(settings == &defaultSettings) {
     454           0 :         if(value == UCOL_DEFAULT) {
     455           0 :             setAttributeDefault(attr);
     456           0 :             return;
     457             :         }
     458             :     }
     459           0 :     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
     460           0 :     if(ownedSettings == NULL) {
     461           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     462           0 :         return;
     463             :     }
     464             : 
     465           0 :     switch(attr) {
     466             :     case UCOL_FRENCH_COLLATION:
     467             :         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
     468           0 :                                defaultSettings.options, errorCode);
     469           0 :         break;
     470             :     case UCOL_ALTERNATE_HANDLING:
     471           0 :         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
     472           0 :         break;
     473             :     case UCOL_CASE_FIRST:
     474           0 :         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
     475           0 :         break;
     476             :     case UCOL_CASE_LEVEL:
     477             :         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
     478           0 :                                defaultSettings.options, errorCode);
     479           0 :         break;
     480             :     case UCOL_NORMALIZATION_MODE:
     481             :         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
     482           0 :                                defaultSettings.options, errorCode);
     483           0 :         break;
     484             :     case UCOL_STRENGTH:
     485           0 :         ownedSettings->setStrength(value, defaultSettings.options, errorCode);
     486           0 :         break;
     487             :     case UCOL_HIRAGANA_QUATERNARY_MODE:
     488             :         // Deprecated attribute. Check for valid values but do not change anything.
     489           0 :         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
     490           0 :             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     491             :         }
     492           0 :         break;
     493             :     case UCOL_NUMERIC_COLLATION:
     494           0 :         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
     495           0 :         break;
     496             :     default:
     497           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     498           0 :         break;
     499             :     }
     500           0 :     if(U_FAILURE(errorCode)) { return; }
     501           0 :     setFastLatinOptions(*ownedSettings);
     502           0 :     if(value == UCOL_DEFAULT) {
     503           0 :         setAttributeDefault(attr);
     504             :     } else {
     505           0 :         setAttributeExplicitly(attr);
     506             :     }
     507             : }
     508             : 
     509             : Collator &
     510           0 : RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
     511           0 :     if(U_FAILURE(errorCode)) { return *this; }
     512             :     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
     513             :     int32_t value;
     514           0 :     if(group == UCOL_REORDER_CODE_DEFAULT) {
     515           0 :         value = UCOL_DEFAULT;
     516           0 :     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
     517           0 :         value = group - UCOL_REORDER_CODE_FIRST;
     518             :     } else {
     519           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     520           0 :         return *this;
     521             :     }
     522           0 :     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
     523           0 :     if(value == oldValue) {
     524           0 :         setAttributeExplicitly(ATTR_VARIABLE_TOP);
     525           0 :         return *this;
     526             :     }
     527           0 :     const CollationSettings &defaultSettings = getDefaultSettings();
     528           0 :     if(settings == &defaultSettings) {
     529           0 :         if(value == UCOL_DEFAULT) {
     530           0 :             setAttributeDefault(ATTR_VARIABLE_TOP);
     531           0 :             return *this;
     532             :         }
     533             :     }
     534           0 :     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
     535           0 :     if(ownedSettings == NULL) {
     536           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     537           0 :         return *this;
     538             :     }
     539             : 
     540           0 :     if(group == UCOL_REORDER_CODE_DEFAULT) {
     541           0 :         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
     542             :     }
     543           0 :     uint32_t varTop = data->getLastPrimaryForGroup(group);
     544           0 :     U_ASSERT(varTop != 0);
     545           0 :     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
     546           0 :     if(U_FAILURE(errorCode)) { return *this; }
     547           0 :     ownedSettings->variableTop = varTop;
     548           0 :     setFastLatinOptions(*ownedSettings);
     549           0 :     if(value == UCOL_DEFAULT) {
     550           0 :         setAttributeDefault(ATTR_VARIABLE_TOP);
     551             :     } else {
     552           0 :         setAttributeExplicitly(ATTR_VARIABLE_TOP);
     553             :     }
     554           0 :     return *this;
     555             : }
     556             : 
     557             : UColReorderCode
     558           0 : RuleBasedCollator::getMaxVariable() const {
     559           0 :     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
     560             : }
     561             : 
     562             : uint32_t
     563           0 : RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
     564           0 :     return settings->variableTop;
     565             : }
     566             : 
     567             : uint32_t
     568           0 : RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
     569           0 :     if(U_FAILURE(errorCode)) { return 0; }
     570           0 :     if(varTop == NULL && len !=0) {
     571           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     572           0 :         return 0;
     573             :     }
     574           0 :     if(len < 0) { len = u_strlen(varTop); }
     575           0 :     if(len == 0) {
     576           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     577           0 :         return 0;
     578             :     }
     579           0 :     UBool numeric = settings->isNumeric();
     580             :     int64_t ce1, ce2;
     581           0 :     if(settings->dontCheckFCD()) {
     582           0 :         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
     583           0 :         ce1 = ci.nextCE(errorCode);
     584           0 :         ce2 = ci.nextCE(errorCode);
     585             :     } else {
     586           0 :         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
     587           0 :         ce1 = ci.nextCE(errorCode);
     588           0 :         ce2 = ci.nextCE(errorCode);
     589             :     }
     590           0 :     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
     591           0 :         errorCode = U_CE_NOT_FOUND_ERROR;
     592           0 :         return 0;
     593             :     }
     594           0 :     setVariableTop((uint32_t)(ce1 >> 32), errorCode);
     595           0 :     return settings->variableTop;
     596             : }
     597             : 
     598             : uint32_t
     599           0 : RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
     600           0 :     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
     601             : }
     602             : 
     603             : void
     604           0 : RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
     605           0 :     if(U_FAILURE(errorCode)) { return; }
     606           0 :     if(varTop != settings->variableTop) {
     607             :         // Pin the variable top to the end of the reordering group which contains it.
     608             :         // Only a few special groups are supported.
     609           0 :         int32_t group = data->getGroupForPrimary(varTop);
     610           0 :         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
     611           0 :             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     612           0 :             return;
     613             :         }
     614           0 :         uint32_t v = data->getLastPrimaryForGroup(group);
     615           0 :         U_ASSERT(v != 0 && v >= varTop);
     616           0 :         varTop = v;
     617           0 :         if(varTop != settings->variableTop) {
     618           0 :             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
     619           0 :             if(ownedSettings == NULL) {
     620           0 :                 errorCode = U_MEMORY_ALLOCATION_ERROR;
     621           0 :                 return;
     622             :             }
     623           0 :             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
     624           0 :                                           getDefaultSettings().options, errorCode);
     625           0 :             if(U_FAILURE(errorCode)) { return; }
     626           0 :             ownedSettings->variableTop = varTop;
     627           0 :             setFastLatinOptions(*ownedSettings);
     628             :         }
     629             :     }
     630           0 :     if(varTop == getDefaultSettings().variableTop) {
     631           0 :         setAttributeDefault(ATTR_VARIABLE_TOP);
     632             :     } else {
     633           0 :         setAttributeExplicitly(ATTR_VARIABLE_TOP);
     634             :     }
     635             : }
     636             : 
     637             : int32_t
     638           0 : RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
     639             :                                    UErrorCode &errorCode) const {
     640           0 :     if(U_FAILURE(errorCode)) { return 0; }
     641           0 :     if(capacity < 0 || (dest == NULL && capacity > 0)) {
     642           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     643           0 :         return 0;
     644             :     }
     645           0 :     int32_t length = settings->reorderCodesLength;
     646           0 :     if(length == 0) { return 0; }
     647           0 :     if(length > capacity) {
     648           0 :         errorCode = U_BUFFER_OVERFLOW_ERROR;
     649           0 :         return length;
     650             :     }
     651           0 :     uprv_memcpy(dest, settings->reorderCodes, length * 4);
     652           0 :     return length;
     653             : }
     654             : 
     655             : void
     656           0 : RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
     657             :                                    UErrorCode &errorCode) {
     658           0 :     if(U_FAILURE(errorCode)) { return; }
     659           0 :     if(length < 0 || (reorderCodes == NULL && length > 0)) {
     660           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     661           0 :         return;
     662             :     }
     663           0 :     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
     664           0 :         length = 0;
     665             :     }
     666           0 :     if(length == settings->reorderCodesLength &&
     667           0 :             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
     668           0 :         return;
     669             :     }
     670           0 :     const CollationSettings &defaultSettings = getDefaultSettings();
     671           0 :     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
     672           0 :         if(settings != &defaultSettings) {
     673           0 :             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
     674           0 :             if(ownedSettings == NULL) {
     675           0 :                 errorCode = U_MEMORY_ALLOCATION_ERROR;
     676           0 :                 return;
     677             :             }
     678           0 :             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
     679           0 :             setFastLatinOptions(*ownedSettings);
     680             :         }
     681           0 :         return;
     682             :     }
     683           0 :     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
     684           0 :     if(ownedSettings == NULL) {
     685           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     686           0 :         return;
     687             :     }
     688           0 :     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
     689           0 :     setFastLatinOptions(*ownedSettings);
     690             : }
     691             : 
     692             : void
     693           0 : RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
     694           0 :     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
     695           0 :             data, ownedSettings,
     696             :             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
     697           0 : }
     698             : 
     699             : UCollationResult
     700           0 : RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
     701             :                            UErrorCode &errorCode) const {
     702           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
     703           0 :     return doCompare(left.getBuffer(), left.length(),
     704           0 :                      right.getBuffer(), right.length(), errorCode);
     705             : }
     706             : 
     707             : UCollationResult
     708           0 : RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
     709             :                            int32_t length, UErrorCode &errorCode) const {
     710           0 :     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
     711           0 :     if(length < 0) {
     712           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     713           0 :         return UCOL_EQUAL;
     714             :     }
     715           0 :     int32_t leftLength = left.length();
     716           0 :     int32_t rightLength = right.length();
     717           0 :     if(leftLength > length) { leftLength = length; }
     718           0 :     if(rightLength > length) { rightLength = length; }
     719           0 :     return doCompare(left.getBuffer(), leftLength,
     720           0 :                      right.getBuffer(), rightLength, errorCode);
     721             : }
     722             : 
     723             : UCollationResult
     724           0 : RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
     725             :                            const UChar *right, int32_t rightLength,
     726             :                            UErrorCode &errorCode) const {
     727           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
     728           0 :     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
     729           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     730           0 :         return UCOL_EQUAL;
     731             :     }
     732             :     // Make sure both or neither strings have a known length.
     733             :     // We do not optimize for mixed length/termination.
     734           0 :     if(leftLength >= 0) {
     735           0 :         if(rightLength < 0) { rightLength = u_strlen(right); }
     736             :     } else {
     737           0 :         if(rightLength >= 0) { leftLength = u_strlen(left); }
     738             :     }
     739           0 :     return doCompare(left, leftLength, right, rightLength, errorCode);
     740             : }
     741             : 
     742             : UCollationResult
     743           0 : RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
     744             :                                UErrorCode &errorCode) const {
     745           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
     746           0 :     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
     747           0 :     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
     748           0 :     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
     749           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     750           0 :         return UCOL_EQUAL;
     751             :     }
     752           0 :     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
     753             : }
     754             : 
     755             : UCollationResult
     756           0 : RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
     757             :                                        const char *right, int32_t rightLength,
     758             :                                        UErrorCode &errorCode) const {
     759           0 :     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
     760           0 :     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
     761           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
     762           0 :         return UCOL_EQUAL;
     763             :     }
     764             :     // Make sure both or neither strings have a known length.
     765             :     // We do not optimize for mixed length/termination.
     766           0 :     if(leftLength >= 0) {
     767           0 :         if(rightLength < 0) { rightLength = uprv_strlen(right); }
     768             :     } else {
     769           0 :         if(rightLength >= 0) { leftLength = uprv_strlen(left); }
     770             :     }
     771             :     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
     772           0 :                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
     773             : }
     774             : 
     775             : namespace {
     776             : 
     777             : /**
     778             :  * Abstract iterator for identical-level string comparisons.
     779             :  * Returns FCD code points and handles temporary switching to NFD.
     780             :  */
     781             : class NFDIterator : public UObject {
     782             : public:
     783           0 :     NFDIterator() : index(-1), length(0) {}
     784           0 :     virtual ~NFDIterator() {}
     785             :     /**
     786             :      * Returns the next code point from the internal normalization buffer,
     787             :      * or else the next text code point.
     788             :      * Returns -1 at the end of the text.
     789             :      */
     790           0 :     UChar32 nextCodePoint() {
     791           0 :         if(index >= 0) {
     792           0 :             if(index == length) {
     793           0 :                 index = -1;
     794             :             } else {
     795             :                 UChar32 c;
     796           0 :                 U16_NEXT_UNSAFE(decomp, index, c);
     797           0 :                 return c;
     798             :             }
     799             :         }
     800           0 :         return nextRawCodePoint();
     801             :     }
     802             :     /**
     803             :      * @param nfcImpl
     804             :      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
     805             :      * @return the first code point in c's decomposition,
     806             :      *         or c itself if it was decomposed already or if it does not decompose
     807             :      */
     808           0 :     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
     809           0 :         if(index >= 0) { return c; }
     810           0 :         decomp = nfcImpl.getDecomposition(c, buffer, length);
     811           0 :         if(decomp == NULL) { return c; }
     812           0 :         index = 0;
     813           0 :         U16_NEXT_UNSAFE(decomp, index, c);
     814           0 :         return c;
     815             :     }
     816             : protected:
     817             :     /**
     818             :      * Returns the next text code point in FCD order.
     819             :      * Returns -1 at the end of the text.
     820             :      */
     821             :     virtual UChar32 nextRawCodePoint() = 0;
     822             : private:
     823             :     const UChar *decomp;
     824             :     UChar buffer[4];
     825             :     int32_t index;
     826             :     int32_t length;
     827             : };
     828             : 
     829           0 : class UTF16NFDIterator : public NFDIterator {
     830             : public:
     831           0 :     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
     832             : protected:
     833           0 :     virtual UChar32 nextRawCodePoint() {
     834           0 :         if(s == limit) { return U_SENTINEL; }
     835           0 :         UChar32 c = *s++;
     836           0 :         if(limit == NULL && c == 0) {
     837           0 :             s = NULL;
     838           0 :             return U_SENTINEL;
     839             :         }
     840             :         UChar trail;
     841           0 :         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
     842           0 :             ++s;
     843           0 :             c = U16_GET_SUPPLEMENTARY(c, trail);
     844             :         }
     845           0 :         return c;
     846             :     }
     847             : 
     848             :     const UChar *s;
     849             :     const UChar *limit;
     850             : };
     851             : 
     852           0 : class FCDUTF16NFDIterator : public UTF16NFDIterator {
     853             : public:
     854           0 :     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
     855           0 :             : UTF16NFDIterator(NULL, NULL) {
     856           0 :         UErrorCode errorCode = U_ZERO_ERROR;
     857           0 :         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
     858           0 :         if(U_FAILURE(errorCode)) { return; }
     859           0 :         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
     860           0 :             s = text;
     861           0 :             limit = spanLimit;
     862             :         } else {
     863           0 :             str.setTo(text, (int32_t)(spanLimit - text));
     864             :             {
     865           0 :                 ReorderingBuffer buffer(nfcImpl, str);
     866           0 :                 if(buffer.init(str.length(), errorCode)) {
     867           0 :                     nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
     868             :                 }
     869             :             }
     870           0 :             if(U_SUCCESS(errorCode)) {
     871           0 :                 s = str.getBuffer();
     872           0 :                 limit = s + str.length();
     873             :             }
     874             :         }
     875             :     }
     876             : private:
     877             :     UnicodeString str;
     878             : };
     879             : 
     880           0 : class UTF8NFDIterator : public NFDIterator {
     881             : public:
     882           0 :     UTF8NFDIterator(const uint8_t *text, int32_t textLength)
     883           0 :         : s(text), pos(0), length(textLength) {}
     884             : protected:
     885           0 :     virtual UChar32 nextRawCodePoint() {
     886           0 :         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
     887             :         UChar32 c;
     888           0 :         U8_NEXT_OR_FFFD(s, pos, length, c);
     889           0 :         return c;
     890             :     }
     891             : 
     892             :     const uint8_t *s;
     893             :     int32_t pos;
     894             :     int32_t length;
     895             : };
     896             : 
     897           0 : class FCDUTF8NFDIterator : public NFDIterator {
     898             : public:
     899           0 :     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
     900           0 :             : u8ci(data, FALSE, text, 0, textLength) {}
     901             : protected:
     902           0 :     virtual UChar32 nextRawCodePoint() {
     903           0 :         UErrorCode errorCode = U_ZERO_ERROR;
     904           0 :         return u8ci.nextCodePoint(errorCode);
     905             :     }
     906             : private:
     907             :     FCDUTF8CollationIterator u8ci;
     908             : };
     909             : 
     910           0 : class UIterNFDIterator : public NFDIterator {
     911             : public:
     912           0 :     UIterNFDIterator(UCharIterator &it) : iter(it) {}
     913             : protected:
     914           0 :     virtual UChar32 nextRawCodePoint() {
     915           0 :         return uiter_next32(&iter);
     916             :     }
     917             : private:
     918             :     UCharIterator &iter;
     919             : };
     920             : 
     921           0 : class FCDUIterNFDIterator : public NFDIterator {
     922             : public:
     923           0 :     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
     924           0 :             : uici(data, FALSE, it, startIndex) {}
     925             : protected:
     926           0 :     virtual UChar32 nextRawCodePoint() {
     927           0 :         UErrorCode errorCode = U_ZERO_ERROR;
     928           0 :         return uici.nextCodePoint(errorCode);
     929             :     }
     930             : private:
     931             :     FCDUIterCollationIterator uici;
     932             : };
     933             : 
     934           0 : UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
     935             :                                 NFDIterator &left, NFDIterator &right) {
     936             :     for(;;) {
     937             :         // Fetch the next FCD code point from each string.
     938           0 :         UChar32 leftCp = left.nextCodePoint();
     939           0 :         UChar32 rightCp = right.nextCodePoint();
     940           0 :         if(leftCp == rightCp) {
     941           0 :             if(leftCp < 0) { break; }
     942           0 :             continue;
     943             :         }
     944             :         // If they are different, then decompose each and compare again.
     945           0 :         if(leftCp < 0) {
     946           0 :             leftCp = -2;  // end of string
     947           0 :         } else if(leftCp == 0xfffe) {
     948           0 :             leftCp = -1;  // U+FFFE: merge separator
     949             :         } else {
     950           0 :             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
     951             :         }
     952           0 :         if(rightCp < 0) {
     953           0 :             rightCp = -2;  // end of string
     954           0 :         } else if(rightCp == 0xfffe) {
     955           0 :             rightCp = -1;  // U+FFFE: merge separator
     956             :         } else {
     957           0 :             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
     958             :         }
     959           0 :         if(leftCp < rightCp) { return UCOL_LESS; }
     960           0 :         if(leftCp > rightCp) { return UCOL_GREATER; }
     961           0 :     }
     962           0 :     return UCOL_EQUAL;
     963             : }
     964             : 
     965             : }  // namespace
     966             : 
     967             : UCollationResult
     968           0 : RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
     969             :                              const UChar *right, int32_t rightLength,
     970             :                              UErrorCode &errorCode) const {
     971             :     // U_FAILURE(errorCode) checked by caller.
     972           0 :     if(left == right && leftLength == rightLength) {
     973           0 :         return UCOL_EQUAL;
     974             :     }
     975             : 
     976             :     // Identical-prefix test.
     977             :     const UChar *leftLimit;
     978             :     const UChar *rightLimit;
     979           0 :     int32_t equalPrefixLength = 0;
     980           0 :     if(leftLength < 0) {
     981           0 :         leftLimit = NULL;
     982           0 :         rightLimit = NULL;
     983             :         UChar c;
     984           0 :         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
     985           0 :             if(c == 0) { return UCOL_EQUAL; }
     986           0 :             ++equalPrefixLength;
     987             :         }
     988             :     } else {
     989           0 :         leftLimit = left + leftLength;
     990           0 :         rightLimit = right + rightLength;
     991             :         for(;;) {
     992           0 :             if(equalPrefixLength == leftLength) {
     993           0 :                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
     994           0 :                 break;
     995           0 :             } else if(equalPrefixLength == rightLength ||
     996           0 :                       left[equalPrefixLength] != right[equalPrefixLength]) {
     997             :                 break;
     998             :             }
     999           0 :             ++equalPrefixLength;
    1000             :         }
    1001             :     }
    1002             : 
    1003           0 :     UBool numeric = settings->isNumeric();
    1004           0 :     if(equalPrefixLength > 0) {
    1005           0 :         if((equalPrefixLength != leftLength &&
    1006           0 :                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
    1007           0 :                 (equalPrefixLength != rightLength &&
    1008           0 :                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
    1009             :             // Identical prefix: Back up to the start of a contraction or reordering sequence.
    1010           0 :             while(--equalPrefixLength > 0 &&
    1011           0 :                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
    1012             :         }
    1013             :         // Notes:
    1014             :         // - A longer string can compare equal to a prefix of it if only ignorables follow.
    1015             :         // - With a backward level, a longer string can compare less-than a prefix of it.
    1016             : 
    1017             :         // Pass the actual start of each string into the CollationIterators,
    1018             :         // plus the equalPrefixLength position,
    1019             :         // so that prefix matches back into the equal prefix work.
    1020             :     }
    1021             : 
    1022             :     int32_t result;
    1023           0 :     int32_t fastLatinOptions = settings->fastLatinOptions;
    1024           0 :     if(fastLatinOptions >= 0 &&
    1025           0 :             (equalPrefixLength == leftLength ||
    1026           0 :                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
    1027           0 :             (equalPrefixLength == rightLength ||
    1028           0 :                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
    1029           0 :         if(leftLength >= 0) {
    1030           0 :             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
    1031           0 :                                                       settings->fastLatinPrimaries,
    1032             :                                                       fastLatinOptions,
    1033           0 :                                                       left + equalPrefixLength,
    1034             :                                                       leftLength - equalPrefixLength,
    1035           0 :                                                       right + equalPrefixLength,
    1036           0 :                                                       rightLength - equalPrefixLength);
    1037             :         } else {
    1038           0 :             result = CollationFastLatin::compareUTF16(data->fastLatinTable,
    1039           0 :                                                       settings->fastLatinPrimaries,
    1040             :                                                       fastLatinOptions,
    1041           0 :                                                       left + equalPrefixLength, -1,
    1042           0 :                                                       right + equalPrefixLength, -1);
    1043             :         }
    1044             :     } else {
    1045           0 :         result = CollationFastLatin::BAIL_OUT_RESULT;
    1046             :     }
    1047             : 
    1048           0 :     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
    1049           0 :         if(settings->dontCheckFCD()) {
    1050           0 :             UTF16CollationIterator leftIter(data, numeric,
    1051           0 :                                             left, left + equalPrefixLength, leftLimit);
    1052           0 :             UTF16CollationIterator rightIter(data, numeric,
    1053           0 :                                             right, right + equalPrefixLength, rightLimit);
    1054           0 :             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1055             :         } else {
    1056           0 :             FCDUTF16CollationIterator leftIter(data, numeric,
    1057           0 :                                               left, left + equalPrefixLength, leftLimit);
    1058           0 :             FCDUTF16CollationIterator rightIter(data, numeric,
    1059           0 :                                                 right, right + equalPrefixLength, rightLimit);
    1060           0 :             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1061             :         }
    1062             :     }
    1063           0 :     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
    1064           0 :         return (UCollationResult)result;
    1065             :     }
    1066             : 
    1067             :     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
    1068             :     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
    1069             :     // and the benefit seems unlikely to be measurable.
    1070             : 
    1071             :     // Compare identical level.
    1072           0 :     const Normalizer2Impl &nfcImpl = data->nfcImpl;
    1073           0 :     left += equalPrefixLength;
    1074           0 :     right += equalPrefixLength;
    1075           0 :     if(settings->dontCheckFCD()) {
    1076           0 :         UTF16NFDIterator leftIter(left, leftLimit);
    1077           0 :         UTF16NFDIterator rightIter(right, rightLimit);
    1078           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1079             :     } else {
    1080           0 :         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
    1081           0 :         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
    1082           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1083             :     }
    1084             : }
    1085             : 
    1086             : UCollationResult
    1087           0 : RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
    1088             :                              const uint8_t *right, int32_t rightLength,
    1089             :                              UErrorCode &errorCode) const {
    1090             :     // U_FAILURE(errorCode) checked by caller.
    1091           0 :     if(left == right && leftLength == rightLength) {
    1092           0 :         return UCOL_EQUAL;
    1093             :     }
    1094             : 
    1095             :     // Identical-prefix test.
    1096           0 :     int32_t equalPrefixLength = 0;
    1097           0 :     if(leftLength < 0) {
    1098             :         uint8_t c;
    1099           0 :         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
    1100           0 :             if(c == 0) { return UCOL_EQUAL; }
    1101           0 :             ++equalPrefixLength;
    1102             :         }
    1103             :     } else {
    1104             :         for(;;) {
    1105           0 :             if(equalPrefixLength == leftLength) {
    1106           0 :                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
    1107           0 :                 break;
    1108           0 :             } else if(equalPrefixLength == rightLength ||
    1109           0 :                       left[equalPrefixLength] != right[equalPrefixLength]) {
    1110             :                 break;
    1111             :             }
    1112           0 :             ++equalPrefixLength;
    1113             :         }
    1114             :     }
    1115             :     // Back up to the start of a partially-equal code point.
    1116           0 :     if(equalPrefixLength > 0 &&
    1117           0 :             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
    1118           0 :             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
    1119           0 :         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
    1120             :     }
    1121             : 
    1122           0 :     UBool numeric = settings->isNumeric();
    1123           0 :     if(equalPrefixLength > 0) {
    1124           0 :         UBool unsafe = FALSE;
    1125           0 :         if(equalPrefixLength != leftLength) {
    1126           0 :             int32_t i = equalPrefixLength;
    1127             :             UChar32 c;
    1128           0 :             U8_NEXT_OR_FFFD(left, i, leftLength, c);
    1129           0 :             unsafe = data->isUnsafeBackward(c, numeric);
    1130             :         }
    1131           0 :         if(!unsafe && equalPrefixLength != rightLength) {
    1132           0 :             int32_t i = equalPrefixLength;
    1133             :             UChar32 c;
    1134           0 :             U8_NEXT_OR_FFFD(right, i, rightLength, c);
    1135           0 :             unsafe = data->isUnsafeBackward(c, numeric);
    1136             :         }
    1137           0 :         if(unsafe) {
    1138             :             // Identical prefix: Back up to the start of a contraction or reordering sequence.
    1139             :             UChar32 c;
    1140           0 :             do {
    1141           0 :                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
    1142           0 :             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
    1143             :         }
    1144             :         // See the notes in the UTF-16 version.
    1145             : 
    1146             :         // Pass the actual start of each string into the CollationIterators,
    1147             :         // plus the equalPrefixLength position,
    1148             :         // so that prefix matches back into the equal prefix work.
    1149             :     }
    1150             : 
    1151             :     int32_t result;
    1152           0 :     int32_t fastLatinOptions = settings->fastLatinOptions;
    1153           0 :     if(fastLatinOptions >= 0 &&
    1154           0 :             (equalPrefixLength == leftLength ||
    1155           0 :                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
    1156           0 :             (equalPrefixLength == rightLength ||
    1157           0 :                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
    1158           0 :         if(leftLength >= 0) {
    1159           0 :             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
    1160           0 :                                                      settings->fastLatinPrimaries,
    1161             :                                                      fastLatinOptions,
    1162             :                                                      left + equalPrefixLength,
    1163             :                                                      leftLength - equalPrefixLength,
    1164             :                                                      right + equalPrefixLength,
    1165           0 :                                                      rightLength - equalPrefixLength);
    1166             :         } else {
    1167           0 :             result = CollationFastLatin::compareUTF8(data->fastLatinTable,
    1168           0 :                                                      settings->fastLatinPrimaries,
    1169             :                                                      fastLatinOptions,
    1170             :                                                      left + equalPrefixLength, -1,
    1171           0 :                                                      right + equalPrefixLength, -1);
    1172             :         }
    1173             :     } else {
    1174           0 :         result = CollationFastLatin::BAIL_OUT_RESULT;
    1175             :     }
    1176             : 
    1177           0 :     if(result == CollationFastLatin::BAIL_OUT_RESULT) {
    1178           0 :         if(settings->dontCheckFCD()) {
    1179           0 :             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
    1180           0 :             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
    1181           0 :             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1182             :         } else {
    1183           0 :             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
    1184           0 :             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
    1185           0 :             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1186             :         }
    1187             :     }
    1188           0 :     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
    1189           0 :         return (UCollationResult)result;
    1190             :     }
    1191             : 
    1192             :     // Note: If NUL-terminated, we could get the actual limits from the iterators now.
    1193             :     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
    1194             :     // and the benefit seems unlikely to be measurable.
    1195             : 
    1196             :     // Compare identical level.
    1197           0 :     const Normalizer2Impl &nfcImpl = data->nfcImpl;
    1198           0 :     left += equalPrefixLength;
    1199           0 :     right += equalPrefixLength;
    1200           0 :     if(leftLength > 0) {
    1201           0 :         leftLength -= equalPrefixLength;
    1202           0 :         rightLength -= equalPrefixLength;
    1203             :     }
    1204           0 :     if(settings->dontCheckFCD()) {
    1205           0 :         UTF8NFDIterator leftIter(left, leftLength);
    1206           0 :         UTF8NFDIterator rightIter(right, rightLength);
    1207           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1208             :     } else {
    1209           0 :         FCDUTF8NFDIterator leftIter(data, left, leftLength);
    1210           0 :         FCDUTF8NFDIterator rightIter(data, right, rightLength);
    1211           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1212             :     }
    1213             : }
    1214             : 
    1215             : UCollationResult
    1216           0 : RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
    1217             :                            UErrorCode &errorCode) const {
    1218           0 :     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
    1219           0 :     UBool numeric = settings->isNumeric();
    1220             : 
    1221             :     // Identical-prefix test.
    1222           0 :     int32_t equalPrefixLength = 0;
    1223             :     {
    1224             :         UChar32 leftUnit;
    1225             :         UChar32 rightUnit;
    1226           0 :         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
    1227           0 :             if(leftUnit < 0) { return UCOL_EQUAL; }
    1228           0 :             ++equalPrefixLength;
    1229             :         }
    1230             : 
    1231             :         // Back out the code units that differed, for the real collation comparison.
    1232           0 :         if(leftUnit >= 0) { left.previous(&left); }
    1233           0 :         if(rightUnit >= 0) { right.previous(&right); }
    1234             : 
    1235           0 :         if(equalPrefixLength > 0) {
    1236           0 :             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
    1237           0 :                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
    1238             :                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
    1239           0 :                 do {
    1240           0 :                     --equalPrefixLength;
    1241           0 :                     leftUnit = left.previous(&left);
    1242           0 :                     right.previous(&right);
    1243           0 :                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
    1244             :             }
    1245             :             // See the notes in the UTF-16 version.
    1246             :         }
    1247             :     }
    1248             : 
    1249             :     UCollationResult result;
    1250           0 :     if(settings->dontCheckFCD()) {
    1251           0 :         UIterCollationIterator leftIter(data, numeric, left);
    1252           0 :         UIterCollationIterator rightIter(data, numeric, right);
    1253           0 :         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1254             :     } else {
    1255           0 :         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
    1256           0 :         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
    1257           0 :         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
    1258             :     }
    1259           0 :     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
    1260           0 :         return result;
    1261             :     }
    1262             : 
    1263             :     // Compare identical level.
    1264           0 :     left.move(&left, equalPrefixLength, UITER_ZERO);
    1265           0 :     right.move(&right, equalPrefixLength, UITER_ZERO);
    1266           0 :     const Normalizer2Impl &nfcImpl = data->nfcImpl;
    1267           0 :     if(settings->dontCheckFCD()) {
    1268           0 :         UIterNFDIterator leftIter(left);
    1269           0 :         UIterNFDIterator rightIter(right);
    1270           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1271             :     } else {
    1272           0 :         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
    1273           0 :         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
    1274           0 :         return compareNFDIter(nfcImpl, leftIter, rightIter);
    1275             :     }
    1276             : }
    1277             : 
    1278             : CollationKey &
    1279           0 : RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
    1280             :                                    UErrorCode &errorCode) const {
    1281           0 :     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
    1282             : }
    1283             : 
    1284             : CollationKey &
    1285           0 : RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
    1286             :                                    UErrorCode &errorCode) const {
    1287           0 :     if(U_FAILURE(errorCode)) {
    1288           0 :         return key.setToBogus();
    1289             :     }
    1290           0 :     if(s == NULL && length != 0) {
    1291           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1292           0 :         return key.setToBogus();
    1293             :     }
    1294           0 :     key.reset();  // resets the "bogus" state
    1295           0 :     CollationKeyByteSink sink(key);
    1296           0 :     writeSortKey(s, length, sink, errorCode);
    1297           0 :     if(U_FAILURE(errorCode)) {
    1298           0 :         key.setToBogus();
    1299           0 :     } else if(key.isBogus()) {
    1300           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
    1301             :     } else {
    1302           0 :         key.setLength(sink.NumberOfBytesAppended());
    1303             :     }
    1304           0 :     return key;
    1305             : }
    1306             : 
    1307             : int32_t
    1308           0 : RuleBasedCollator::getSortKey(const UnicodeString &s,
    1309             :                               uint8_t *dest, int32_t capacity) const {
    1310           0 :     return getSortKey(s.getBuffer(), s.length(), dest, capacity);
    1311             : }
    1312             : 
    1313             : int32_t
    1314           0 : RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
    1315             :                               uint8_t *dest, int32_t capacity) const {
    1316           0 :     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
    1317           0 :         return 0;
    1318             :     }
    1319           0 :     uint8_t noDest[1] = { 0 };
    1320           0 :     if(dest == NULL) {
    1321             :         // Distinguish pure preflighting from an allocation error.
    1322           0 :         dest = noDest;
    1323           0 :         capacity = 0;
    1324             :     }
    1325           0 :     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
    1326           0 :     UErrorCode errorCode = U_ZERO_ERROR;
    1327           0 :     writeSortKey(s, length, sink, errorCode);
    1328           0 :     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
    1329             : }
    1330             : 
    1331             : void
    1332           0 : RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
    1333             :                                 SortKeyByteSink &sink, UErrorCode &errorCode) const {
    1334           0 :     if(U_FAILURE(errorCode)) { return; }
    1335           0 :     const UChar *limit = (length >= 0) ? s + length : NULL;
    1336           0 :     UBool numeric = settings->isNumeric();
    1337           0 :     CollationKeys::LevelCallback callback;
    1338           0 :     if(settings->dontCheckFCD()) {
    1339           0 :         UTF16CollationIterator iter(data, numeric, s, s, limit);
    1340           0 :         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
    1341             :                                                   sink, Collation::PRIMARY_LEVEL,
    1342           0 :                                                   callback, TRUE, errorCode);
    1343             :     } else {
    1344           0 :         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
    1345           0 :         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
    1346             :                                                   sink, Collation::PRIMARY_LEVEL,
    1347           0 :                                                   callback, TRUE, errorCode);
    1348             :     }
    1349           0 :     if(settings->getStrength() == UCOL_IDENTICAL) {
    1350           0 :         writeIdenticalLevel(s, limit, sink, errorCode);
    1351             :     }
    1352             :     static const char terminator = 0;  // TERMINATOR_BYTE
    1353           0 :     sink.Append(&terminator, 1);
    1354             : }
    1355             : 
    1356             : void
    1357           0 : RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
    1358             :                                        SortKeyByteSink &sink, UErrorCode &errorCode) const {
    1359             :     // NFD quick check
    1360           0 :     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
    1361           0 :     if(U_FAILURE(errorCode)) { return; }
    1362           0 :     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
    1363           0 :     UChar32 prev = 0;
    1364           0 :     if(nfdQCYesLimit != s) {
    1365           0 :         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
    1366             :     }
    1367             :     // Is there non-NFD text?
    1368             :     int32_t destLengthEstimate;
    1369           0 :     if(limit != NULL) {
    1370           0 :         if(nfdQCYesLimit == limit) { return; }
    1371           0 :         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
    1372             :     } else {
    1373             :         // s is NUL-terminated
    1374           0 :         if(*nfdQCYesLimit == 0) { return; }
    1375           0 :         destLengthEstimate = -1;
    1376             :     }
    1377           0 :     UnicodeString nfd;
    1378           0 :     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
    1379           0 :     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
    1380             : }
    1381             : 
    1382             : namespace {
    1383             : 
    1384             : /**
    1385             :  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
    1386             :  * with an instance of this callback class.
    1387             :  * When another level is about to be written, the callback
    1388             :  * records the level and the number of bytes that will be written until
    1389             :  * the sink (which is actually a FixedSortKeyByteSink) fills up.
    1390             :  *
    1391             :  * When internalNextSortKeyPart() is called again, it restarts with the last level
    1392             :  * and ignores as many bytes as were written previously for that level.
    1393             :  */
    1394             : class PartLevelCallback : public CollationKeys::LevelCallback {
    1395             : public:
    1396           0 :     PartLevelCallback(const SortKeyByteSink &s)
    1397           0 :             : sink(s), level(Collation::PRIMARY_LEVEL) {
    1398           0 :         levelCapacity = sink.GetRemainingCapacity();
    1399           0 :     }
    1400           0 :     virtual ~PartLevelCallback() {}
    1401           0 :     virtual UBool needToWrite(Collation::Level l) {
    1402           0 :         if(!sink.Overflowed()) {
    1403             :             // Remember a level that will be at least partially written.
    1404           0 :             level = l;
    1405           0 :             levelCapacity = sink.GetRemainingCapacity();
    1406           0 :             return TRUE;
    1407             :         } else {
    1408           0 :             return FALSE;
    1409             :         }
    1410             :     }
    1411           0 :     Collation::Level getLevel() const { return level; }
    1412           0 :     int32_t getLevelCapacity() const { return levelCapacity; }
    1413             : 
    1414             : private:
    1415             :     const SortKeyByteSink &sink;
    1416             :     Collation::Level level;
    1417             :     int32_t levelCapacity;
    1418             : };
    1419             : 
    1420             : }  // namespace
    1421             : 
    1422             : int32_t
    1423           0 : RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
    1424             :                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
    1425           0 :     if(U_FAILURE(errorCode)) { return 0; }
    1426           0 :     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
    1427           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1428           0 :         return 0;
    1429             :     }
    1430           0 :     if(count == 0) { return 0; }
    1431             : 
    1432           0 :     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
    1433           0 :     sink.IgnoreBytes((int32_t)state[1]);
    1434           0 :     iter->move(iter, 0, UITER_START);
    1435             : 
    1436           0 :     Collation::Level level = (Collation::Level)state[0];
    1437           0 :     if(level <= Collation::QUATERNARY_LEVEL) {
    1438           0 :         UBool numeric = settings->isNumeric();
    1439           0 :         PartLevelCallback callback(sink);
    1440           0 :         if(settings->dontCheckFCD()) {
    1441           0 :             UIterCollationIterator ci(data, numeric, *iter);
    1442           0 :             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
    1443           0 :                                                       sink, level, callback, FALSE, errorCode);
    1444             :         } else {
    1445           0 :             FCDUIterCollationIterator ci(data, numeric, *iter, 0);
    1446           0 :             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
    1447           0 :                                                       sink, level, callback, FALSE, errorCode);
    1448             :         }
    1449           0 :         if(U_FAILURE(errorCode)) { return 0; }
    1450           0 :         if(sink.NumberOfBytesAppended() > count) {
    1451           0 :             state[0] = (uint32_t)callback.getLevel();
    1452           0 :             state[1] = (uint32_t)callback.getLevelCapacity();
    1453           0 :             return count;
    1454             :         }
    1455             :         // All of the normal levels are done.
    1456           0 :         if(settings->getStrength() == UCOL_IDENTICAL) {
    1457           0 :             level = Collation::IDENTICAL_LEVEL;
    1458           0 :             iter->move(iter, 0, UITER_START);
    1459             :         }
    1460             :         // else fall through to setting ZERO_LEVEL
    1461             :     }
    1462             : 
    1463           0 :     if(level == Collation::IDENTICAL_LEVEL) {
    1464           0 :         int32_t levelCapacity = sink.GetRemainingCapacity();
    1465           0 :         UnicodeString s;
    1466             :         for(;;) {
    1467           0 :             UChar32 c = iter->next(iter);
    1468           0 :             if(c < 0) { break; }
    1469           0 :             s.append((UChar)c);
    1470           0 :         }
    1471           0 :         const UChar *sArray = s.getBuffer();
    1472           0 :         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
    1473           0 :         if(U_FAILURE(errorCode)) { return 0; }
    1474           0 :         if(sink.NumberOfBytesAppended() > count) {
    1475           0 :             state[0] = (uint32_t)level;
    1476           0 :             state[1] = (uint32_t)levelCapacity;
    1477           0 :             return count;
    1478             :         }
    1479             :     }
    1480             : 
    1481             :     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
    1482           0 :     state[0] = (uint32_t)Collation::ZERO_LEVEL;
    1483           0 :     state[1] = 0;
    1484           0 :     int32_t length = sink.NumberOfBytesAppended();
    1485           0 :     int32_t i = length;
    1486           0 :     while(i < count) { dest[i++] = 0; }
    1487           0 :     return length;
    1488             : }
    1489             : 
    1490             : void
    1491           0 : RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
    1492             :                                   UErrorCode &errorCode) const {
    1493           0 :     if(U_FAILURE(errorCode)) { return; }
    1494           0 :     const UChar *s = str.getBuffer();
    1495           0 :     const UChar *limit = s + str.length();
    1496           0 :     UBool numeric = settings->isNumeric();
    1497           0 :     if(settings->dontCheckFCD()) {
    1498           0 :         UTF16CollationIterator iter(data, numeric, s, s, limit);
    1499             :         int64_t ce;
    1500           0 :         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
    1501           0 :             ces.addElement(ce, errorCode);
    1502             :         }
    1503             :     } else {
    1504           0 :         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
    1505             :         int64_t ce;
    1506           0 :         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
    1507           0 :             ces.addElement(ce, errorCode);
    1508             :         }
    1509             :     }
    1510             : }
    1511             : 
    1512             : namespace {
    1513             : 
    1514           0 : void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
    1515             :                   UErrorCode &errorCode) {
    1516           0 :     if(U_FAILURE(errorCode) || length == 0) { return; }
    1517           0 :     if(!s.isEmpty()) {
    1518           0 :         s.append('_', errorCode);
    1519             :     }
    1520           0 :     s.append(letter, errorCode);
    1521           0 :     for(int32_t i = 0; i < length; ++i) {
    1522           0 :         s.append(uprv_toupper(subtag[i]), errorCode);
    1523             :     }
    1524             : }
    1525             : 
    1526           0 : void appendAttribute(CharString &s, char letter, UColAttributeValue value,
    1527             :                      UErrorCode &errorCode) {
    1528           0 :     if(U_FAILURE(errorCode)) { return; }
    1529           0 :     if(!s.isEmpty()) {
    1530           0 :         s.append('_', errorCode);
    1531             :     }
    1532             :     static const char *valueChars = "1234...........IXO..SN..LU......";
    1533           0 :     s.append(letter, errorCode);
    1534           0 :     s.append(valueChars[value], errorCode);
    1535             : }
    1536             : 
    1537             : }  // namespace
    1538             : 
    1539             : int32_t
    1540           0 : RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
    1541             :                                                     char *buffer, int32_t capacity,
    1542             :                                                     UErrorCode &errorCode) const {
    1543           0 :     if(U_FAILURE(errorCode)) { return 0; }
    1544           0 :     if(buffer == NULL ? capacity != 0 : capacity < 0) {
    1545           0 :         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1546           0 :         return 0;
    1547             :     }
    1548           0 :     if(locale == NULL) {
    1549           0 :         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
    1550             :     }
    1551             : 
    1552             :     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
    1553             :     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
    1554             :                                                   "collation", locale,
    1555           0 :                                                   NULL, &errorCode);
    1556           0 :     if(U_FAILURE(errorCode)) { return 0; }
    1557           0 :     if(length == 0) {
    1558           0 :         uprv_strcpy(resultLocale, "root");
    1559             :     } else {
    1560           0 :         resultLocale[length] = 0;
    1561             :     }
    1562             : 
    1563             :     // Append items in alphabetic order of their short definition letters.
    1564           0 :     CharString result;
    1565             :     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
    1566             : 
    1567           0 :     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
    1568           0 :         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
    1569             :     }
    1570             :     // ATTR_VARIABLE_TOP not supported because 'B' was broken.
    1571             :     // See ICU tickets #10372 and #10386.
    1572           0 :     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
    1573           0 :         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
    1574             :     }
    1575           0 :     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
    1576           0 :         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
    1577             :     }
    1578           0 :     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
    1579           0 :         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
    1580             :     }
    1581           0 :     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
    1582           0 :         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
    1583             :     }
    1584             :     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
    1585           0 :     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
    1586           0 :     appendSubtag(result, 'K', subtag, length, errorCode);
    1587           0 :     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
    1588           0 :     appendSubtag(result, 'L', subtag, length, errorCode);
    1589           0 :     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
    1590           0 :         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
    1591             :     }
    1592           0 :     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
    1593           0 :     appendSubtag(result, 'R', subtag, length, errorCode);
    1594           0 :     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
    1595           0 :         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
    1596             :     }
    1597           0 :     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
    1598           0 :     appendSubtag(result, 'V', subtag, length, errorCode);
    1599           0 :     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
    1600           0 :     appendSubtag(result, 'Z', subtag, length, errorCode);
    1601             : 
    1602           0 :     if(U_FAILURE(errorCode)) { return 0; }
    1603           0 :     if(result.length() <= capacity) {
    1604           0 :         uprv_memcpy(buffer, result.data(), result.length());
    1605             :     }
    1606           0 :     return u_terminateChars(buffer, capacity, result.length(), &errorCode);
    1607             : }
    1608             : 
    1609             : UBool
    1610           0 : RuleBasedCollator::isUnsafe(UChar32 c) const {
    1611           0 :     return data->isUnsafeBackward(c, settings->isNumeric());
    1612             : }
    1613             : 
    1614             : void U_CALLCONV
    1615           0 : RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
    1616           0 :     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
    1617           0 : }
    1618             : 
    1619             : UBool
    1620           0 : RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
    1621           0 :     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
    1622           0 :     return U_SUCCESS(errorCode);
    1623             : }
    1624             : 
    1625             : CollationElementIterator *
    1626           0 : RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
    1627           0 :     UErrorCode errorCode = U_ZERO_ERROR;
    1628           0 :     if(!initMaxExpansions(errorCode)) { return NULL; }
    1629           0 :     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
    1630           0 :     if(U_FAILURE(errorCode)) {
    1631           0 :         delete cei;
    1632           0 :         return NULL;
    1633             :     }
    1634           0 :     return cei;
    1635             : }
    1636             : 
    1637             : CollationElementIterator *
    1638           0 : RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
    1639           0 :     UErrorCode errorCode = U_ZERO_ERROR;
    1640           0 :     if(!initMaxExpansions(errorCode)) { return NULL; }
    1641           0 :     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
    1642           0 :     if(U_FAILURE(errorCode)) {
    1643           0 :         delete cei;
    1644           0 :         return NULL;
    1645             :     }
    1646           0 :     return cei;
    1647             : }
    1648             : 
    1649             : int32_t
    1650           0 : RuleBasedCollator::getMaxExpansion(int32_t order) const {
    1651           0 :     UErrorCode errorCode = U_ZERO_ERROR;
    1652           0 :     (void)initMaxExpansions(errorCode);
    1653           0 :     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
    1654             : }
    1655             : 
    1656             : U_NAMESPACE_END
    1657             : 
    1658             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13