LCOV - output.info - intl/icu/source/i18n/alphaindex.cpp

LCOV - code coverage report

Current view:	top level - intl/icu/source/i18n - alphaindex.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	672	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	69	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2009-2014, International Business Machines Corporation and
       6             : * others. All Rights Reserved.
       7             : *******************************************************************************
       8             : */
       9             : 
      10             : #include "unicode/utypes.h"
      11             : 
      12             : #if !UCONFIG_NO_COLLATION
      13             : 
      14             : #include "unicode/alphaindex.h"
      15             : #include "unicode/coll.h"
      16             : #include "unicode/localpointer.h"
      17             : #include "unicode/normalizer2.h"
      18             : #include "unicode/tblcoll.h"
      19             : #include "unicode/uchar.h"
      20             : #include "unicode/ulocdata.h"
      21             : #include "unicode/uniset.h"
      22             : #include "unicode/uobject.h"
      23             : #include "unicode/usetiter.h"
      24             : #include "unicode/utf16.h"
      25             : 
      26             : #include "cmemory.h"
      27             : #include "cstring.h"
      28             : #include "uassert.h"
      29             : #include "uvector.h"
      30             : #include "uvectr64.h"
      31             : 
      32             : //#include <string>
      33             : //#include <iostream>
      34             : 
      35             : U_NAMESPACE_BEGIN
      36             : 
      37             : namespace {
      38             : 
      39             : /**
      40             :  * Prefix string for Chinese index buckets.
      41             :  * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
      42             :  */
      43             : const UChar BASE[1] = { 0xFDD0 };
      44             : const int32_t BASE_LENGTH = 1;
      45             : 
      46             : UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
      47             :                                 const UnicodeString &one, const UnicodeString &other);
      48             : 
      49             : }  // namespace
      50             : 
      51             : static int32_t U_CALLCONV
      52             : collatorComparator(const void *context, const void *left, const void *right);
      53             : 
      54             : static int32_t U_CALLCONV
      55             : recordCompareFn(const void *context, const void *left, const void *right);
      56             : 
      57             : //  UVector<Record *> support function, delete a Record.
      58             : static void U_CALLCONV
      59           0 : alphaIndex_deleteRecord(void *obj) {
      60           0 :     delete static_cast<AlphabeticIndex::Record *>(obj);
      61           0 : }
      62             : 
      63             : namespace {
      64             : 
      65           0 : UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
      66             :                            UErrorCode &errorCode) {
      67           0 :     if (U_FAILURE(errorCode)) { return NULL; }
      68           0 :     if (owned.isValid()) {
      69           0 :         return owned.orphan();
      70             :     }
      71           0 :     UnicodeString *p = new UnicodeString(s);
      72           0 :     if (p == NULL) {
      73           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
      74             :     }
      75           0 :     return p;
      76             : }
      77             : 
      78           0 : inline UnicodeString *getString(const UVector &list, int32_t i) {
      79           0 :     return static_cast<UnicodeString *>(list[i]);
      80             : }
      81             : 
      82           0 : inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
      83           0 :     return static_cast<AlphabeticIndex::Bucket *>(list[i]);
      84             : }
      85             : 
      86           0 : inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
      87           0 :     return static_cast<AlphabeticIndex::Record *>(list[i]);
      88             : }
      89             : 
      90             : /**
      91             :  * Like Java Collections.binarySearch(List, String, Comparator).
      92             :  *
      93             :  * @return the index>=0 where the item was found,
      94             :  *         or the index<0 for inserting the string at ~index in sorted order
      95             :  */
      96           0 : int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
      97           0 :     if (list.size() == 0) { return ~0; }
      98           0 :     int32_t start = 0;
      99           0 :     int32_t limit = list.size();
     100             :     for (;;) {
     101           0 :         int32_t i = (start + limit) / 2;
     102           0 :         const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
     103           0 :         UErrorCode errorCode = U_ZERO_ERROR;
     104           0 :         UCollationResult cmp = coll.compare(s, *si, errorCode);
     105           0 :         if (cmp == UCOL_EQUAL) {
     106           0 :             return i;
     107           0 :         } else if (cmp < 0) {
     108           0 :             if (i == start) {
     109           0 :                 return ~start;  // insert s before *si
     110             :             }
     111           0 :             limit = i;
     112             :         } else {
     113           0 :             if (i == start) {
     114           0 :                 return ~(start + 1);  // insert s after *si
     115             :             }
     116           0 :             start = i;
     117             :         }
     118           0 :     }
     119             : }
     120             : 
     121             : }  // namespace
     122             : 
     123             : // The BucketList is not in the anonymous namespace because only Clang
     124             : // seems to support its use in other classes from there.
     125             : // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
     126             : class BucketList : public UObject {
     127             : public:
     128           0 :     BucketList(UVector *bucketList, UVector *publicBucketList)
     129           0 :             : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
     130           0 :         int32_t displayIndex = 0;
     131           0 :         for (int32_t i = 0; i < publicBucketList->size(); ++i) {
     132           0 :             getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
     133             :         }
     134           0 :     }
     135             : 
     136             :     // The virtual destructor must not be inline.
     137             :     // See ticket #8454 for details.
     138             :     virtual ~BucketList();
     139             : 
     140           0 :     int32_t getBucketCount() const {
     141           0 :         return immutableVisibleList_->size();
     142             :     }
     143             : 
     144           0 :     int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
     145             :                            UErrorCode &errorCode) {
     146             :         // binary search
     147           0 :         int32_t start = 0;
     148           0 :         int32_t limit = bucketList_->size();
     149           0 :         while ((start + 1) < limit) {
     150           0 :             int32_t i = (start + limit) / 2;
     151           0 :             const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
     152             :             UCollationResult nameVsBucket =
     153           0 :                 collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
     154           0 :             if (nameVsBucket < 0) {
     155           0 :                 limit = i;
     156             :             } else {
     157           0 :                 start = i;
     158             :             }
     159             :         }
     160           0 :         const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
     161           0 :         if (bucket->displayBucket_ != NULL) {
     162           0 :             bucket = bucket->displayBucket_;
     163             :         }
     164           0 :         return bucket->displayIndex_;
     165             :     }
     166             : 
     167             :     /** All of the buckets, visible and invisible. */
     168             :     UVector *bucketList_;
     169             :     /** Just the visible buckets. */
     170             :     UVector *immutableVisibleList_;
     171             : };
     172             : 
     173           0 : BucketList::~BucketList() {
     174           0 :     delete bucketList_;
     175           0 :     if (immutableVisibleList_ != bucketList_) {
     176           0 :         delete immutableVisibleList_;
     177             :     }
     178           0 : }
     179             : 
     180           0 : AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
     181           0 :     delete buckets_;
     182           0 :     delete collatorPrimaryOnly_;
     183           0 : }
     184             : 
     185             : int32_t
     186           0 : AlphabeticIndex::ImmutableIndex::getBucketCount() const {
     187           0 :     return buckets_->getBucketCount();
     188             : }
     189             : 
     190             : int32_t
     191           0 : AlphabeticIndex::ImmutableIndex::getBucketIndex(
     192             :         const UnicodeString &name, UErrorCode &errorCode) const {
     193           0 :     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
     194             : }
     195             : 
     196             : const AlphabeticIndex::Bucket *
     197           0 : AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
     198           0 :     if (0 <= index && index < buckets_->getBucketCount()) {
     199           0 :         return icu::getBucket(*buckets_->immutableVisibleList_, index);
     200             :     } else {
     201           0 :         return NULL;
     202             :     }
     203             : }
     204             : 
     205           0 : AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
     206             :         : inputList_(NULL),
     207             :           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
     208             :           maxLabelCount_(99),
     209             :           initialLabels_(NULL), firstCharsInScripts_(NULL),
     210             :           collator_(NULL), collatorPrimaryOnly_(NULL),
     211           0 :           buckets_(NULL) {
     212           0 :     init(&locale, status);
     213           0 : }
     214             : 
     215             : 
     216           0 : AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
     217             :         : inputList_(NULL),
     218             :           labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
     219             :           maxLabelCount_(99),
     220             :           initialLabels_(NULL), firstCharsInScripts_(NULL),
     221             :           collator_(collator), collatorPrimaryOnly_(NULL),
     222           0 :           buckets_(NULL) {
     223           0 :     init(NULL, status);
     224           0 : }
     225             : 
     226             : 
     227             : 
     228           0 : AlphabeticIndex::~AlphabeticIndex() {
     229           0 :     delete collator_;
     230           0 :     delete collatorPrimaryOnly_;
     231           0 :     delete firstCharsInScripts_;
     232           0 :     delete buckets_;
     233           0 :     delete inputList_;
     234           0 :     delete initialLabels_;
     235           0 : }
     236             : 
     237             : 
     238           0 : AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
     239           0 :     if (U_FAILURE(status)) {
     240           0 :         return *this;
     241             :     }
     242           0 :     initialLabels_->addAll(additions);
     243           0 :     clearBuckets();
     244           0 :     return *this;
     245             : }
     246             : 
     247             : 
     248           0 : AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
     249           0 :     addIndexExemplars(locale, status);
     250           0 :     clearBuckets();
     251           0 :     return *this;
     252             : }
     253             : 
     254             : 
     255           0 : AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
     256           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     257             :     // In C++, the ImmutableIndex must own its copy of the BucketList,
     258             :     // even if it contains no records, for proper memory management.
     259             :     // We could clone the buckets_ if they are not NULL,
     260             :     // but that would be worth it only if this method is called multiple times,
     261             :     // or called after using the old-style bucket iterator API.
     262           0 :     LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
     263             :     LocalPointer<RuleBasedCollator> coll(
     264           0 :         static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
     265           0 :     if (immutableBucketList.isNull() || coll.isNull()) {
     266           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     267           0 :         return NULL;
     268             :     }
     269           0 :     ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
     270           0 :     if (immIndex == NULL) {
     271           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     272           0 :         return NULL;
     273             :     }
     274             :     // The ImmutableIndex adopted its parameter objects.
     275           0 :     immutableBucketList.orphan();
     276           0 :     coll.orphan();
     277           0 :     return immIndex;
     278             : }
     279             : 
     280           0 : int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
     281           0 :     initBuckets(status);
     282           0 :     if (U_FAILURE(status)) {
     283           0 :         return 0;
     284             :     }
     285           0 :     return buckets_->getBucketCount();
     286             : }
     287             : 
     288             : 
     289           0 : int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
     290           0 :     if (U_FAILURE(status) || inputList_ == NULL) {
     291           0 :         return 0;
     292             :     }
     293           0 :     return inputList_->size();
     294             : }
     295             : 
     296           0 : void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
     297           0 :     const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
     298           0 :     if (U_FAILURE(errorCode)) { return; }
     299             : 
     300           0 :     const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
     301             :     const UnicodeString &overflowBoundary =
     302           0 :         *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
     303             : 
     304             :     // We make a sorted array of elements.
     305             :     // Some of the input may be redundant.
     306             :     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
     307             :     // We filter out those cases.
     308           0 :     UnicodeSetIterator iter(*initialLabels_);
     309           0 :     while (iter.next()) {
     310           0 :         const UnicodeString *item = &iter.getString();
     311           0 :         LocalPointer<UnicodeString> ownedItem;
     312             :         UBool checkDistinct;
     313           0 :         int32_t itemLength = item->length();
     314           0 :         if (!item->hasMoreChar32Than(0, itemLength, 1)) {
     315           0 :             checkDistinct = FALSE;
     316           0 :         } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
     317           0 :                 item->charAt(itemLength - 2) != 0x2a) {
     318             :             // Use a label if it is marked with one trailing star,
     319             :             // even if the label string sorts the same when all contractions are suppressed.
     320           0 :             ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
     321           0 :             item = ownedItem.getAlias();
     322           0 :             if (item == NULL) {
     323           0 :                 errorCode = U_MEMORY_ALLOCATION_ERROR;
     324           0 :                 return;
     325             :             }
     326           0 :             checkDistinct = FALSE;
     327             :         } else {
     328           0 :             checkDistinct = TRUE;
     329             :         }
     330           0 :         if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
     331             :             // Ignore a primary-ignorable or non-alphabetic index character.
     332           0 :         } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
     333             :             // Ignore an index character that will land in the overflow bucket.
     334           0 :         } else if (checkDistinct &&
     335           0 :                 collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
     336             :             // Ignore a multi-code point index character that does not sort distinctly
     337             :             // from the sequence of its separate characters.
     338             :         } else {
     339           0 :             int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
     340           0 :             if (insertionPoint < 0) {
     341           0 :                 indexCharacters.insertElementAt(
     342           0 :                     ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
     343             :             } else {
     344           0 :                 const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
     345           0 :                 if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
     346             :                     indexCharacters.setElementAt(
     347           0 :                         ownedString(*item, ownedItem, errorCode), insertionPoint);
     348             :                 }
     349             :             }
     350             :         }
     351             :     }
     352           0 :     if (U_FAILURE(errorCode)) { return; }
     353             : 
     354             :     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
     355             : 
     356           0 :     int32_t size = indexCharacters.size() - 1;
     357           0 :     if (size > maxLabelCount_) {
     358           0 :         int32_t count = 0;
     359           0 :         int32_t old = -1;
     360           0 :         for (int32_t i = 0; i < indexCharacters.size();) {
     361           0 :             ++count;
     362           0 :             int32_t bump = count * maxLabelCount_ / size;
     363           0 :             if (bump == old) {
     364           0 :                 indexCharacters.removeElementAt(i);
     365             :             } else {
     366           0 :                 old = bump;
     367           0 :                 ++i;
     368             :             }
     369             :         }
     370             :     }
     371             : }
     372             : 
     373             : namespace {
     374             : 
     375           0 : const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
     376           0 :     if (!current.startsWith(BASE, BASE_LENGTH)) {
     377           0 :         return current;
     378             :     }
     379           0 :     UChar rest = current.charAt(BASE_LENGTH);
     380           0 :     if (0x2800 < rest && rest <= 0x28FF) { // stroke count
     381           0 :         int32_t count = rest-0x2800;
     382           0 :         temp.setTo((UChar)(0x30 + count % 10));
     383           0 :         if (count >= 10) {
     384           0 :             count /= 10;
     385           0 :             temp.insert(0, (UChar)(0x30 + count % 10));
     386           0 :             if (count >= 10) {
     387           0 :                 count /= 10;
     388           0 :                 temp.insert(0, (UChar)(0x30 + count));
     389             :             }
     390             :         }
     391           0 :         return temp.append((UChar)0x5283);
     392             :     }
     393           0 :     return temp.setTo(current, BASE_LENGTH);
     394             : }
     395             : 
     396           0 : UBool hasMultiplePrimaryWeights(
     397             :         const RuleBasedCollator &coll, uint32_t variableTop,
     398             :         const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
     399           0 :     ces.removeAllElements();
     400           0 :     coll.internalGetCEs(s, ces, errorCode);
     401           0 :     if (U_FAILURE(errorCode)) { return FALSE; }
     402           0 :     UBool seenPrimary = FALSE;
     403           0 :     for (int32_t i = 0; i < ces.size(); ++i) {
     404           0 :         int64_t ce = ces.elementAti(i);
     405           0 :         uint32_t p = (uint32_t)(ce >> 32);
     406           0 :         if (p > variableTop) {
     407             :             // not primary ignorable
     408           0 :             if (seenPrimary) {
     409           0 :                 return TRUE;
     410             :             }
     411           0 :             seenPrimary = TRUE;
     412             :         }
     413             :     }
     414           0 :     return FALSE;
     415             : }
     416             : 
     417             : }  // namespace
     418             : 
     419           0 : BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
     420             :     // Initialize indexCharacters.
     421           0 :     UVector indexCharacters(errorCode);
     422           0 :     indexCharacters.setDeleter(uprv_deleteUObject);
     423           0 :     initLabels(indexCharacters, errorCode);
     424           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     425             : 
     426             :     // Variables for hasMultiplePrimaryWeights().
     427           0 :     UVector64 ces(errorCode);
     428             :     uint32_t variableTop;
     429           0 :     if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
     430           0 :         variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
     431             :     } else {
     432           0 :         variableTop = 0;
     433             :     }
     434           0 :     UBool hasInvisibleBuckets = FALSE;
     435             : 
     436             :     // Helper arrays for Chinese Pinyin collation.
     437             :     Bucket *asciiBuckets[26] = {
     438             :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     439             :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
     440           0 :     };
     441             :     Bucket *pinyinBuckets[26] = {
     442             :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     443             :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
     444           0 :     };
     445           0 :     UBool hasPinyin = FALSE;
     446             : 
     447           0 :     LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
     448           0 :     if (U_FAILURE(errorCode)) {
     449           0 :         return NULL;
     450             :     }
     451           0 :     bucketList->setDeleter(uprv_deleteUObject);
     452             : 
     453             :     // underflow bucket
     454           0 :     Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
     455           0 :     if (bucket == NULL) {
     456           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     457           0 :         return NULL;
     458             :     }
     459           0 :     bucketList->addElement(bucket, errorCode);
     460           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     461             : 
     462           0 :     UnicodeString temp;
     463             : 
     464             :     // fix up the list, adding underflow, additions, overflow
     465             :     // Insert inflow labels as needed.
     466           0 :     int32_t scriptIndex = -1;
     467           0 :     const UnicodeString *scriptUpperBoundary = &emptyString_;
     468           0 :     for (int32_t i = 0; i < indexCharacters.size(); ++i) {
     469           0 :         UnicodeString &current = *getString(indexCharacters, i);
     470           0 :         if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
     471             :             // We crossed the script boundary into a new script.
     472           0 :             const UnicodeString &inflowBoundary = *scriptUpperBoundary;
     473           0 :             UBool skippedScript = FALSE;
     474             :             for (;;) {
     475           0 :                 scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
     476           0 :                 if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
     477           0 :                     break;
     478             :                 }
     479           0 :                 skippedScript = TRUE;
     480             :             }
     481           0 :             if (skippedScript && bucketList->size() > 1) {
     482             :                 // We are skipping one or more scripts,
     483             :                 // and we are not just getting out of the underflow label.
     484           0 :                 bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
     485           0 :                 if (bucket == NULL) {
     486           0 :                     errorCode = U_MEMORY_ALLOCATION_ERROR;
     487           0 :                     return NULL;
     488             :                 }
     489           0 :                 bucketList->addElement(bucket, errorCode);
     490             :             }
     491             :         }
     492             :         // Add a bucket with the current label.
     493           0 :         bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
     494           0 :         if (bucket == NULL) {
     495           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
     496           0 :             return NULL;
     497             :         }
     498           0 :         bucketList->addElement(bucket, errorCode);
     499             :         // Remember ASCII and Pinyin buckets for Pinyin redirects.
     500             :         UChar c;
     501           0 :         if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
     502           0 :             asciiBuckets[c - 0x41] = bucket;
     503           0 :         } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
     504           0 :                 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
     505           0 :             pinyinBuckets[c - 0x41] = bucket;
     506           0 :             hasPinyin = TRUE;
     507             :         }
     508             :         // Check for multiple primary weights.
     509           0 :         if (!current.startsWith(BASE, BASE_LENGTH) &&
     510           0 :                 hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
     511           0 :                                           ces, errorCode) &&
     512           0 :                 current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
     513             :             // "AE-ligature" or "Sch" etc.
     514           0 :             for (int32_t i = bucketList->size() - 2;; --i) {
     515           0 :                 Bucket *singleBucket = getBucket(*bucketList, i);
     516           0 :                 if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
     517             :                     // There is no single-character bucket since the last
     518             :                     // underflow or inflow label.
     519           0 :                     break;
     520             :                 }
     521           0 :                 if (singleBucket->displayBucket_ == NULL &&
     522           0 :                         !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
     523             :                                                    singleBucket->lowerBoundary_,
     524             :                                                    ces, errorCode)) {
     525             :                     // Add an invisible bucket that redirects strings greater than the expansion
     526             :                     // to the previous single-character bucket.
     527             :                     // For example, after ... Q R S Sch we add Sch\uFFFF->S
     528             :                     // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
     529           0 :                     bucket = new Bucket(emptyString_,
     530           0 :                         UnicodeString(current).append((UChar)0xFFFF),
     531           0 :                         U_ALPHAINDEX_NORMAL);
     532           0 :                     if (bucket == NULL) {
     533           0 :                         errorCode = U_MEMORY_ALLOCATION_ERROR;
     534           0 :                         return NULL;
     535             :                     }
     536           0 :                     bucket->displayBucket_ = singleBucket;
     537           0 :                     bucketList->addElement(bucket, errorCode);
     538           0 :                     hasInvisibleBuckets = TRUE;
     539           0 :                     break;
     540             :                 }
     541           0 :             }
     542             :         }
     543             :     }
     544           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     545           0 :     if (bucketList->size() == 1) {
     546             :         // No real labels, show only the underflow label.
     547           0 :         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
     548           0 :         if (bl == NULL) {
     549           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
     550           0 :             return NULL;
     551             :         }
     552           0 :         bucketList.orphan();
     553           0 :         return bl;
     554             :     }
     555             :     // overflow bucket
     556           0 :     bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
     557           0 :     if (bucket == NULL) {
     558           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     559           0 :         return NULL;
     560             :     }
     561           0 :     bucketList->addElement(bucket, errorCode); // final
     562             : 
     563           0 :     if (hasPinyin) {
     564             :         // Redirect Pinyin buckets.
     565           0 :         Bucket *asciiBucket = NULL;
     566           0 :         for (int32_t i = 0; i < 26; ++i) {
     567           0 :             if (asciiBuckets[i] != NULL) {
     568           0 :                 asciiBucket = asciiBuckets[i];
     569             :             }
     570           0 :             if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
     571           0 :                 pinyinBuckets[i]->displayBucket_ = asciiBucket;
     572           0 :                 hasInvisibleBuckets = TRUE;
     573             :             }
     574             :         }
     575             :     }
     576             : 
     577           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     578           0 :     if (!hasInvisibleBuckets) {
     579           0 :         BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
     580           0 :         if (bl == NULL) {
     581           0 :             errorCode = U_MEMORY_ALLOCATION_ERROR;
     582           0 :             return NULL;
     583             :         }
     584           0 :         bucketList.orphan();
     585           0 :         return bl;
     586             :     }
     587             :     // Merge inflow buckets that are visually adjacent.
     588             :     // Iterate backwards: Merge inflow into overflow rather than the other way around.
     589           0 :     int32_t i = bucketList->size() - 1;
     590           0 :     Bucket *nextBucket = getBucket(*bucketList, i);
     591           0 :     while (--i > 0) {
     592           0 :         bucket = getBucket(*bucketList, i);
     593           0 :         if (bucket->displayBucket_ != NULL) {
     594           0 :             continue;  // skip invisible buckets
     595             :         }
     596           0 :         if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
     597           0 :             if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
     598           0 :                 bucket->displayBucket_ = nextBucket;
     599           0 :                 continue;
     600             :             }
     601             :         }
     602           0 :         nextBucket = bucket;
     603             :     }
     604             : 
     605           0 :     LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
     606           0 :     if (U_FAILURE(errorCode)) {
     607           0 :         return NULL;
     608             :     }
     609             :     // Do not call publicBucketList->setDeleter():
     610             :     // This vector shares its objects with the bucketList.
     611           0 :     for (int32_t i = 0; i < bucketList->size(); ++i) {
     612           0 :         bucket = getBucket(*bucketList, i);
     613           0 :         if (bucket->displayBucket_ == NULL) {
     614           0 :             publicBucketList->addElement(bucket, errorCode);
     615             :         }
     616             :     }
     617           0 :     if (U_FAILURE(errorCode)) { return NULL; }
     618           0 :     BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
     619           0 :     if (bl == NULL) {
     620           0 :         errorCode = U_MEMORY_ALLOCATION_ERROR;
     621           0 :         return NULL;
     622             :     }
     623           0 :     bucketList.orphan();
     624           0 :     publicBucketList.orphan();
     625           0 :     return bl;
     626             : }
     627             : 
     628             : /**
     629             :  * Creates an index, and buckets and sorts the list of records into the index.
     630             :  */
     631           0 : void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
     632           0 :     if (U_FAILURE(errorCode) || buckets_ != NULL) {
     633           0 :         return;
     634             :     }
     635           0 :     buckets_ = createBucketList(errorCode);
     636           0 :     if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
     637           0 :         return;
     638             :     }
     639             : 
     640             :     // Sort the records by name.
     641             :     // Stable sort preserves input order of collation duplicates.
     642           0 :     inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
     643             : 
     644             :     // Now, we traverse all of the input, which is now sorted.
     645             :     // If the item doesn't go in the current bucket, we find the next bucket that contains it.
     646             :     // This makes the process order n*log(n), since we just sort the list and then do a linear process.
     647             :     // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
     648             :     // we need to improve it for that case.
     649             : 
     650           0 :     Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
     651           0 :     int32_t bucketIndex = 1;
     652             :     Bucket *nextBucket;
     653             :     const UnicodeString *upperBoundary;
     654           0 :     if (bucketIndex < buckets_->bucketList_->size()) {
     655           0 :         nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
     656           0 :         upperBoundary = &nextBucket->lowerBoundary_;
     657             :     } else {
     658           0 :         nextBucket = NULL;
     659           0 :         upperBoundary = NULL;
     660             :     }
     661           0 :     for (int32_t i = 0; i < inputList_->size(); ++i) {
     662           0 :         Record *r = getRecord(*inputList_, i);
     663             :         // if the current bucket isn't the right one, find the one that is
     664             :         // We have a special flag for the last bucket so that we don't look any further
     665           0 :         while (upperBoundary != NULL &&
     666           0 :                 collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
     667           0 :             currentBucket = nextBucket;
     668             :             // now reset the boundary that we compare against
     669           0 :             if (bucketIndex < buckets_->bucketList_->size()) {
     670           0 :                 nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
     671           0 :                 upperBoundary = &nextBucket->lowerBoundary_;
     672             :             } else {
     673           0 :                 upperBoundary = NULL;
     674             :             }
     675             :         }
     676             :         // now put the record into the bucket.
     677           0 :         Bucket *bucket = currentBucket;
     678           0 :         if (bucket->displayBucket_ != NULL) {
     679           0 :             bucket = bucket->displayBucket_;
     680             :         }
     681           0 :         if (bucket->records_ == NULL) {
     682           0 :             bucket->records_ = new UVector(errorCode);
     683           0 :             if (bucket->records_ == NULL) {
     684           0 :                 errorCode = U_MEMORY_ALLOCATION_ERROR;
     685           0 :                 return;
     686             :             }
     687             :         }
     688           0 :         bucket->records_->addElement(r, errorCode);
     689             :     }
     690             : }
     691             : 
     692           0 : void AlphabeticIndex::clearBuckets() {
     693           0 :     if (buckets_ != NULL) {
     694           0 :         delete buckets_;
     695           0 :         buckets_ = NULL;
     696           0 :         internalResetBucketIterator();
     697             :     }
     698           0 : }
     699             : 
     700           0 : void AlphabeticIndex::internalResetBucketIterator() {
     701           0 :     labelsIterIndex_ = -1;
     702           0 :     currentBucket_ = NULL;
     703           0 : }
     704             : 
     705             : 
     706           0 : void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
     707           0 :     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
     708           0 :     if (U_FAILURE(status)) {
     709           0 :         return;
     710             :     }
     711             : 
     712           0 :     UnicodeSet exemplars;
     713           0 :     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
     714           0 :     if (U_SUCCESS(status)) {
     715           0 :         initialLabels_->addAll(exemplars);
     716           0 :         return;
     717             :     }
     718           0 :     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
     719             : 
     720             :     // The locale data did not include explicit Index characters.
     721             :     // Synthesize a set of them from the locale's standard exemplar characters.
     722           0 :     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
     723           0 :     if (U_FAILURE(status)) {
     724           0 :         return;
     725             :     }
     726             : 
     727             :     // question: should we add auxiliary exemplars?
     728           0 :     if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
     729           0 :         exemplars.add(0x61, 0x7A);
     730             :     }
     731           0 :     if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
     732             :         // cut down to small list
     733           0 :         exemplars.remove(0xAC00, 0xD7A3).
     734           0 :             add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
     735           0 :             add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
     736           0 :             add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
     737           0 :             add(0xD30C).add(0xD558);
     738             :     }
     739           0 :     if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
     740             :         // cut down to small list
     741             :         // make use of the fact that Ethiopic is allocated in 8's, where
     742             :         // the base is 0 mod 8.
     743             :         UnicodeSet ethiopic(
     744           0 :             UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
     745           0 :         UnicodeSetIterator it(ethiopic);
     746           0 :         while (it.next() && !it.isString()) {
     747           0 :             if ((it.getCodepoint() & 0x7) != 0) {
     748           0 :                 exemplars.remove(it.getCodepoint());
     749             :             }
     750             :         }
     751             :     }
     752             : 
     753             :     // Upper-case any that aren't already so.
     754             :     //   (We only do this for synthesized index characters.)
     755           0 :     UnicodeSetIterator it(exemplars);
     756           0 :     UnicodeString upperC;
     757           0 :     while (it.next()) {
     758           0 :         const UnicodeString &exemplarC = it.getString();
     759           0 :         upperC = exemplarC;
     760           0 :         upperC.toUpper(locale);
     761           0 :         initialLabels_->add(upperC);
     762             :     }
     763             : }
     764             : 
     765           0 : UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
     766           0 :     UnicodeSet contractions;
     767           0 :     collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
     768           0 :     if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
     769           0 :     initialLabels_->addAll(contractions);
     770           0 :     UnicodeSetIterator iter(contractions);
     771           0 :     while (iter.next()) {
     772           0 :         const UnicodeString &s = iter.getString();
     773           0 :         U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
     774           0 :         UChar c = s.charAt(s.length() - 1);
     775           0 :         if (0x41 <= c && c <= 0x5A) {  // A-Z
     776             :             // There are Pinyin labels, add ASCII A-Z labels as well.
     777           0 :             initialLabels_->add(0x41, 0x5A);  // A-Z
     778           0 :             break;
     779             :         }
     780             :     }
     781           0 :     return TRUE;
     782             : }
     783             : 
     784             : 
     785             : /*
     786             :  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
     787             :  */
     788             : static const UChar CGJ = 0x034F;
     789           0 : UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
     790           0 :     UnicodeString result;
     791           0 :     if (item.length() == 0) {
     792           0 :         return result;
     793             :     }
     794           0 :     int32_t i = 0;
     795             :     for (;;) {
     796           0 :         UChar32  cp = item.char32At(i);
     797           0 :         result.append(cp);
     798           0 :         i = item.moveIndex32(i, 1);
     799           0 :         if (i >= item.length()) {
     800           0 :             break;
     801             :         }
     802           0 :         result.append(CGJ);
     803           0 :     }
     804           0 :     return result;
     805             : }
     806             : 
     807             : 
     808           0 : UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
     809           0 :     return FALSE;
     810             : }
     811             : 
     812             : 
     813           0 : UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
     814           0 :     return FALSE;
     815             : }
     816             : 
     817             : 
     818           0 : const RuleBasedCollator &AlphabeticIndex::getCollator() const {
     819           0 :     return *collator_;
     820             : }
     821             : 
     822             : 
     823           0 : const UnicodeString &AlphabeticIndex::getInflowLabel() const {
     824           0 :     return inflowLabel_;
     825             : }
     826             : 
     827           0 : const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
     828           0 :     return overflowLabel_;
     829             : }
     830             : 
     831             : 
     832           0 : const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
     833           0 :     return underflowLabel_;
     834             : }
     835             : 
     836             : 
     837           0 : AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     838           0 :     inflowLabel_ = label;
     839           0 :     clearBuckets();
     840           0 :     return *this;
     841             : }
     842             : 
     843             : 
     844           0 : AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     845           0 :     overflowLabel_ = label;
     846           0 :     clearBuckets();
     847           0 :     return *this;
     848             : }
     849             : 
     850             : 
     851           0 : AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     852           0 :     underflowLabel_ = label;
     853           0 :     clearBuckets();
     854           0 :     return *this;
     855             : }
     856             : 
     857             : 
     858           0 : int32_t AlphabeticIndex::getMaxLabelCount() const {
     859           0 :     return maxLabelCount_;
     860             : }
     861             : 
     862             : 
     863           0 : AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
     864           0 :     if (U_FAILURE(status)) {
     865           0 :         return *this;
     866             :     }
     867           0 :     if (maxLabelCount <= 0) {
     868           0 :         status = U_ILLEGAL_ARGUMENT_ERROR;
     869           0 :         return *this;
     870             :     }
     871           0 :     maxLabelCount_ = maxLabelCount;
     872           0 :     clearBuckets();
     873           0 :     return *this;
     874             : }
     875             : 
     876             : 
     877             : //
     878             : //  init() - Common code for constructors.
     879             : //
     880             : 
     881           0 : void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
     882           0 :     if (U_FAILURE(status)) { return; }
     883           0 :     if (locale == NULL && collator_ == NULL) {
     884           0 :         status = U_ILLEGAL_ARGUMENT_ERROR;
     885           0 :         return;
     886             :     }
     887             : 
     888           0 :     initialLabels_         = new UnicodeSet();
     889           0 :     if (initialLabels_ == NULL) {
     890           0 :         status = U_MEMORY_ALLOCATION_ERROR;
     891           0 :         return;
     892             :     }
     893             : 
     894           0 :     inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
     895           0 :     overflowLabel_ = inflowLabel_;
     896           0 :     underflowLabel_ = inflowLabel_;
     897             : 
     898           0 :     if (collator_ == NULL) {
     899           0 :         Collator *coll = Collator::createInstance(*locale, status);
     900           0 :         if (U_FAILURE(status)) {
     901           0 :             delete coll;
     902           0 :             return;
     903             :         }
     904           0 :         if (coll == NULL) {
     905           0 :             status = U_MEMORY_ALLOCATION_ERROR;
     906           0 :             return;
     907             :         }
     908           0 :         collator_ = dynamic_cast<RuleBasedCollator *>(coll);
     909           0 :         if (collator_ == NULL) {
     910           0 :             delete coll;
     911           0 :             status = U_UNSUPPORTED_ERROR;
     912           0 :             return;
     913             :         }
     914             :     }
     915           0 :     collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
     916           0 :     if (collatorPrimaryOnly_ == NULL) {
     917           0 :         status = U_MEMORY_ALLOCATION_ERROR;
     918           0 :         return;
     919             :     }
     920           0 :     collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
     921           0 :     firstCharsInScripts_ = firstStringsInScript(status);
     922           0 :     if (U_FAILURE(status)) { return; }
     923           0 :     firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
     924             :     // Guard against a degenerate collator where
     925             :     // some script boundary strings are primary ignorable.
     926             :     for (;;) {
     927           0 :         if (U_FAILURE(status)) { return; }
     928           0 :         if (firstCharsInScripts_->isEmpty()) {
     929             :             // AlphabeticIndex requires some non-ignorable script boundary strings.
     930           0 :             status = U_ILLEGAL_ARGUMENT_ERROR;
     931           0 :             return;
     932             :         }
     933           0 :         if (collatorPrimaryOnly_->compare(
     934           0 :                 *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
     935           0 :                 emptyString_, status) == UCOL_EQUAL) {
     936           0 :             firstCharsInScripts_->removeElementAt(0);
     937             :         } else {
     938           0 :             break;
     939             :         }
     940             :     }
     941             : 
     942             :     // Chinese index characters, which are specific to each of the several Chinese tailorings,
     943             :     // take precedence over the single locale data exemplar set per language.
     944           0 :     if (!addChineseIndexCharacters(status) && locale != NULL) {
     945           0 :         addIndexExemplars(*locale, status);
     946             :     }
     947             : }
     948             : 
     949             : 
     950             : //
     951             : //  Comparison function for UVector<UnicodeString *> sorting with a collator.
     952             : //
     953             : static int32_t U_CALLCONV
     954           0 : collatorComparator(const void *context, const void *left, const void *right) {
     955           0 :     const UElement *leftElement = static_cast<const UElement *>(left);
     956           0 :     const UElement *rightElement = static_cast<const UElement *>(right);
     957           0 :     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
     958           0 :     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
     959             : 
     960           0 :     if (leftString == rightString) {
     961             :         // Catches case where both are NULL
     962           0 :         return 0;
     963             :     }
     964           0 :     if (leftString == NULL) {
     965           0 :         return 1;
     966             :     };
     967           0 :     if (rightString == NULL) {
     968           0 :         return -1;
     969             :     }
     970           0 :     const Collator *col = static_cast<const Collator *>(context);
     971           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     972           0 :     return col->compare(*leftString, *rightString, errorCode);
     973             : }
     974             : 
     975             : //
     976             : //  Comparison function for UVector<Record *> sorting with a collator.
     977             : //
     978             : static int32_t U_CALLCONV
     979           0 : recordCompareFn(const void *context, const void *left, const void *right) {
     980           0 :     const UElement *leftElement = static_cast<const UElement *>(left);
     981           0 :     const UElement *rightElement = static_cast<const UElement *>(right);
     982           0 :     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
     983           0 :     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
     984           0 :     const Collator *col = static_cast<const Collator *>(context);
     985           0 :     UErrorCode errorCode = U_ZERO_ERROR;
     986           0 :     return col->compare(leftRec->name_, rightRec->name_, errorCode);
     987             : }
     988             : 
     989           0 : UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
     990           0 :     if (U_FAILURE(status)) {
     991           0 :         return NULL;
     992             :     }
     993           0 :     LocalPointer<UVector> dest(new UVector(status), status);
     994           0 :     if (U_FAILURE(status)) {
     995           0 :         return NULL;
     996             :     }
     997           0 :     dest->setDeleter(uprv_deleteUObject);
     998             :     // Fetch the script-first-primary contractions which are defined in the root collator.
     999             :     // They all start with U+FDD1.
    1000           0 :     UnicodeSet set;
    1001           0 :     collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
    1002           0 :     if (U_FAILURE(status)) {
    1003           0 :         return NULL;
    1004             :     }
    1005           0 :     if (set.isEmpty()) {
    1006           0 :         status = U_UNSUPPORTED_ERROR;
    1007           0 :         return NULL;
    1008             :     }
    1009           0 :     UnicodeSetIterator iter(set);
    1010           0 :     while (iter.next()) {
    1011           0 :         const UnicodeString &boundary = iter.getString();
    1012           0 :         uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
    1013           0 :         if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
    1014             :             // Ignore boundaries for the special reordering groups.
    1015             :             // Take only those for "real scripts" (where the sample character is a Letter,
    1016             :             // and the one for unassigned implicit weights (Cn).
    1017           0 :             continue;
    1018             :         }
    1019           0 :         UnicodeString *s = new UnicodeString(boundary);
    1020           0 :         if (s == NULL) {
    1021           0 :             status = U_MEMORY_ALLOCATION_ERROR;
    1022           0 :             return NULL;
    1023             :         }
    1024           0 :         dest->addElement(s, status);
    1025             :     }
    1026           0 :     return dest.orphan();
    1027             : }
    1028             : 
    1029             : 
    1030             : namespace {
    1031             : 
    1032             : /**
    1033             :  * Returns true if one index character string is "better" than the other.
    1034             :  * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
    1035             :  * better, and otherwise binary-less-than is better.
    1036             :  */
    1037           0 : UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
    1038             :                                 const UnicodeString &one, const UnicodeString &other) {
    1039             :     // This is called with primary-equal strings, but never with one.equals(other).
    1040           0 :     UErrorCode status = U_ZERO_ERROR;
    1041           0 :     UnicodeString n1 = nfkdNormalizer.normalize(one, status);
    1042           0 :     UnicodeString n2 = nfkdNormalizer.normalize(other, status);
    1043           0 :     if (U_FAILURE(status)) { return FALSE; }
    1044           0 :     int32_t result = n1.countChar32() - n2.countChar32();
    1045           0 :     if (result != 0) {
    1046           0 :         return result < 0;
    1047             :     }
    1048           0 :     result = n1.compareCodePointOrder(n2);
    1049           0 :     if (result != 0) {
    1050           0 :         return result < 0;
    1051             :     }
    1052           0 :     return one.compareCodePointOrder(other) < 0;
    1053             : }
    1054             : 
    1055             : }  // namespace
    1056             : 
    1057             : //
    1058             : //  Constructor & Destructor for AlphabeticIndex::Record
    1059             : //
    1060             : //     Records are internal only, instances are not directly surfaced in the public API.
    1061             : //     This class is mostly struct-like, with all public fields.
    1062             : 
    1063           0 : AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
    1064           0 :         : name_(name), data_(data) {}
    1065             : 
    1066           0 : AlphabeticIndex::Record::~Record() {
    1067           0 : }
    1068             : 
    1069             : 
    1070           0 : AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
    1071           0 :     if (U_FAILURE(status)) {
    1072           0 :         return *this;
    1073             :     }
    1074           0 :     if (inputList_ == NULL) {
    1075           0 :         inputList_ = new UVector(status);
    1076           0 :         if (inputList_ == NULL) {
    1077           0 :             status = U_MEMORY_ALLOCATION_ERROR;
    1078           0 :             return *this;
    1079             :         }
    1080           0 :         inputList_->setDeleter(alphaIndex_deleteRecord);
    1081             :     }
    1082           0 :     Record *r = new Record(name, data);
    1083           0 :     if (r == NULL) {
    1084           0 :         status = U_MEMORY_ALLOCATION_ERROR;
    1085           0 :         return *this;
    1086             :     }
    1087           0 :     inputList_->addElement(r, status);
    1088           0 :     clearBuckets();
    1089             :     //std::string ss;
    1090             :     //std::string ss2;
    1091             :     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 
    1092             :     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
    1093           0 :     return *this;
    1094             : }
    1095             : 
    1096             : 
    1097           0 : AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
    1098           0 :     if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
    1099           0 :         inputList_->removeAllElements();
    1100           0 :         clearBuckets();
    1101             :     }
    1102           0 :     return *this;
    1103             : }
    1104             : 
    1105           0 : int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
    1106           0 :     initBuckets(status);
    1107           0 :     if (U_FAILURE(status)) {
    1108           0 :         return 0;
    1109             :     }
    1110           0 :     return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
    1111             : }
    1112             : 
    1113             : 
    1114           0 : int32_t AlphabeticIndex::getBucketIndex() const {
    1115           0 :     return labelsIterIndex_;
    1116             : }
    1117             : 
    1118             : 
    1119           0 : UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
    1120           0 :     if (U_FAILURE(status)) {
    1121           0 :         return FALSE;
    1122             :     }
    1123           0 :     if (buckets_ == NULL && currentBucket_ != NULL) {
    1124           0 :         status = U_ENUM_OUT_OF_SYNC_ERROR;
    1125           0 :         return FALSE;
    1126             :     }
    1127           0 :     initBuckets(status);
    1128           0 :     if (U_FAILURE(status)) {
    1129           0 :         return FALSE;
    1130             :     }
    1131           0 :     ++labelsIterIndex_;
    1132           0 :     if (labelsIterIndex_ >= buckets_->getBucketCount()) {
    1133           0 :         labelsIterIndex_ = buckets_->getBucketCount();
    1134           0 :         return FALSE;
    1135             :     }
    1136           0 :     currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
    1137           0 :     resetRecordIterator();
    1138           0 :     return TRUE;
    1139             : }
    1140             : 
    1141           0 : const UnicodeString &AlphabeticIndex::getBucketLabel() const {
    1142           0 :     if (currentBucket_ != NULL) {
    1143           0 :         return currentBucket_->label_;
    1144             :     } else {
    1145           0 :         return emptyString_;
    1146             :     }
    1147             : }
    1148             : 
    1149             : 
    1150           0 : UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
    1151           0 :     if (currentBucket_ != NULL) {
    1152           0 :         return currentBucket_->labelType_;
    1153             :     } else {
    1154           0 :         return U_ALPHAINDEX_NORMAL;
    1155             :     }
    1156             : }
    1157             : 
    1158             : 
    1159           0 : int32_t AlphabeticIndex::getBucketRecordCount() const {
    1160           0 :     if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
    1161           0 :         return currentBucket_->records_->size();
    1162             :     } else {
    1163           0 :         return 0;
    1164             :     }
    1165             : }
    1166             : 
    1167             : 
    1168           0 : AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
    1169           0 :     if (U_FAILURE(status)) {
    1170           0 :         return *this;
    1171             :     }
    1172           0 :     internalResetBucketIterator();
    1173           0 :     return *this;
    1174             : }
    1175             : 
    1176             : 
    1177           0 : UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
    1178           0 :     if (U_FAILURE(status)) {
    1179           0 :         return FALSE;
    1180             :     }
    1181           0 :     if (currentBucket_ == NULL) {
    1182             :         // We are trying to iterate over the items in a bucket, but there is no
    1183             :         // current bucket from the enumeration of buckets.
    1184           0 :         status = U_INVALID_STATE_ERROR;
    1185           0 :         return FALSE;
    1186             :     }
    1187           0 :     if (buckets_ == NULL) {
    1188           0 :         status = U_ENUM_OUT_OF_SYNC_ERROR;
    1189           0 :         return FALSE;
    1190             :     }
    1191           0 :     if (currentBucket_->records_ == NULL) {
    1192           0 :         return FALSE;
    1193             :     }
    1194           0 :     ++itemsIterIndex_;
    1195           0 :     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
    1196           0 :         itemsIterIndex_  = currentBucket_->records_->size();
    1197           0 :         return FALSE;
    1198             :     }
    1199           0 :     return TRUE;
    1200             : }
    1201             : 
    1202             : 
    1203           0 : const UnicodeString &AlphabeticIndex::getRecordName() const {
    1204           0 :     const UnicodeString *retStr = &emptyString_;
    1205           0 :     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
    1206           0 :         itemsIterIndex_ >= 0 &&
    1207           0 :         itemsIterIndex_ < currentBucket_->records_->size()) {
    1208           0 :             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
    1209           0 :             retStr = &item->name_;
    1210             :     }
    1211           0 :     return *retStr;
    1212             : }
    1213             : 
    1214           0 : const void *AlphabeticIndex::getRecordData() const {
    1215           0 :     const void *retPtr = NULL;
    1216           0 :     if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
    1217           0 :         itemsIterIndex_ >= 0 &&
    1218           0 :         itemsIterIndex_ < currentBucket_->records_->size()) {
    1219           0 :             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
    1220           0 :             retPtr = item->data_;
    1221             :     }
    1222           0 :     return retPtr;
    1223             : }
    1224             : 
    1225             : 
    1226           0 : AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
    1227           0 :     itemsIterIndex_ = -1;
    1228           0 :     return *this;
    1229             : }
    1230             : 
    1231             : 
    1232             : 
    1233           0 : AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
    1234             :                                 const UnicodeString &lowerBoundary,
    1235           0 :                                 UAlphabeticIndexLabelType type)
    1236             :         : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
    1237             :           displayBucket_(NULL), displayIndex_(-1),
    1238           0 :           records_(NULL) {
    1239           0 : }
    1240             : 
    1241             : 
    1242           0 : AlphabeticIndex::Bucket::~Bucket() {
    1243           0 :     delete records_;
    1244           0 : }
    1245             : 
    1246             : U_NAMESPACE_END
    1247             : 
    1248             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13