Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2009-2014, International Business Machines Corporation and
6 : * others. All Rights Reserved.
7 : *******************************************************************************
8 : */
9 :
10 : #include "unicode/utypes.h"
11 :
12 : #if !UCONFIG_NO_COLLATION
13 :
14 : #include "unicode/alphaindex.h"
15 : #include "unicode/coll.h"
16 : #include "unicode/localpointer.h"
17 : #include "unicode/normalizer2.h"
18 : #include "unicode/tblcoll.h"
19 : #include "unicode/uchar.h"
20 : #include "unicode/ulocdata.h"
21 : #include "unicode/uniset.h"
22 : #include "unicode/uobject.h"
23 : #include "unicode/usetiter.h"
24 : #include "unicode/utf16.h"
25 :
26 : #include "cmemory.h"
27 : #include "cstring.h"
28 : #include "uassert.h"
29 : #include "uvector.h"
30 : #include "uvectr64.h"
31 :
32 : //#include <string>
33 : //#include <iostream>
34 :
35 : U_NAMESPACE_BEGIN
36 :
37 : namespace {
38 :
39 : /**
40 : * Prefix string for Chinese index buckets.
41 : * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
42 : */
43 : const UChar BASE[1] = { 0xFDD0 };
44 : const int32_t BASE_LENGTH = 1;
45 :
46 : UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
47 : const UnicodeString &one, const UnicodeString &other);
48 :
49 : } // namespace
50 :
51 : static int32_t U_CALLCONV
52 : collatorComparator(const void *context, const void *left, const void *right);
53 :
54 : static int32_t U_CALLCONV
55 : recordCompareFn(const void *context, const void *left, const void *right);
56 :
57 : // UVector<Record *> support function, delete a Record.
58 : static void U_CALLCONV
59 0 : alphaIndex_deleteRecord(void *obj) {
60 0 : delete static_cast<AlphabeticIndex::Record *>(obj);
61 0 : }
62 :
63 : namespace {
64 :
65 0 : UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
66 : UErrorCode &errorCode) {
67 0 : if (U_FAILURE(errorCode)) { return NULL; }
68 0 : if (owned.isValid()) {
69 0 : return owned.orphan();
70 : }
71 0 : UnicodeString *p = new UnicodeString(s);
72 0 : if (p == NULL) {
73 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
74 : }
75 0 : return p;
76 : }
77 :
78 0 : inline UnicodeString *getString(const UVector &list, int32_t i) {
79 0 : return static_cast<UnicodeString *>(list[i]);
80 : }
81 :
82 0 : inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
83 0 : return static_cast<AlphabeticIndex::Bucket *>(list[i]);
84 : }
85 :
86 0 : inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
87 0 : return static_cast<AlphabeticIndex::Record *>(list[i]);
88 : }
89 :
90 : /**
91 : * Like Java Collections.binarySearch(List, String, Comparator).
92 : *
93 : * @return the index>=0 where the item was found,
94 : * or the index<0 for inserting the string at ~index in sorted order
95 : */
96 0 : int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
97 0 : if (list.size() == 0) { return ~0; }
98 0 : int32_t start = 0;
99 0 : int32_t limit = list.size();
100 : for (;;) {
101 0 : int32_t i = (start + limit) / 2;
102 0 : const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
103 0 : UErrorCode errorCode = U_ZERO_ERROR;
104 0 : UCollationResult cmp = coll.compare(s, *si, errorCode);
105 0 : if (cmp == UCOL_EQUAL) {
106 0 : return i;
107 0 : } else if (cmp < 0) {
108 0 : if (i == start) {
109 0 : return ~start; // insert s before *si
110 : }
111 0 : limit = i;
112 : } else {
113 0 : if (i == start) {
114 0 : return ~(start + 1); // insert s after *si
115 : }
116 0 : start = i;
117 : }
118 0 : }
119 : }
120 :
121 : } // namespace
122 :
123 : // The BucketList is not in the anonymous namespace because only Clang
124 : // seems to support its use in other classes from there.
125 : // However, we also don't need U_I18N_API because it is not used from outside the i18n library.
126 : class BucketList : public UObject {
127 : public:
128 0 : BucketList(UVector *bucketList, UVector *publicBucketList)
129 0 : : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
130 0 : int32_t displayIndex = 0;
131 0 : for (int32_t i = 0; i < publicBucketList->size(); ++i) {
132 0 : getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
133 : }
134 0 : }
135 :
136 : // The virtual destructor must not be inline.
137 : // See ticket #8454 for details.
138 : virtual ~BucketList();
139 :
140 0 : int32_t getBucketCount() const {
141 0 : return immutableVisibleList_->size();
142 : }
143 :
144 0 : int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
145 : UErrorCode &errorCode) {
146 : // binary search
147 0 : int32_t start = 0;
148 0 : int32_t limit = bucketList_->size();
149 0 : while ((start + 1) < limit) {
150 0 : int32_t i = (start + limit) / 2;
151 0 : const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
152 : UCollationResult nameVsBucket =
153 0 : collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
154 0 : if (nameVsBucket < 0) {
155 0 : limit = i;
156 : } else {
157 0 : start = i;
158 : }
159 : }
160 0 : const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
161 0 : if (bucket->displayBucket_ != NULL) {
162 0 : bucket = bucket->displayBucket_;
163 : }
164 0 : return bucket->displayIndex_;
165 : }
166 :
167 : /** All of the buckets, visible and invisible. */
168 : UVector *bucketList_;
169 : /** Just the visible buckets. */
170 : UVector *immutableVisibleList_;
171 : };
172 :
173 0 : BucketList::~BucketList() {
174 0 : delete bucketList_;
175 0 : if (immutableVisibleList_ != bucketList_) {
176 0 : delete immutableVisibleList_;
177 : }
178 0 : }
179 :
180 0 : AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
181 0 : delete buckets_;
182 0 : delete collatorPrimaryOnly_;
183 0 : }
184 :
185 : int32_t
186 0 : AlphabeticIndex::ImmutableIndex::getBucketCount() const {
187 0 : return buckets_->getBucketCount();
188 : }
189 :
190 : int32_t
191 0 : AlphabeticIndex::ImmutableIndex::getBucketIndex(
192 : const UnicodeString &name, UErrorCode &errorCode) const {
193 0 : return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
194 : }
195 :
196 : const AlphabeticIndex::Bucket *
197 0 : AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
198 0 : if (0 <= index && index < buckets_->getBucketCount()) {
199 0 : return icu::getBucket(*buckets_->immutableVisibleList_, index);
200 : } else {
201 0 : return NULL;
202 : }
203 : }
204 :
205 0 : AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
206 : : inputList_(NULL),
207 : labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
208 : maxLabelCount_(99),
209 : initialLabels_(NULL), firstCharsInScripts_(NULL),
210 : collator_(NULL), collatorPrimaryOnly_(NULL),
211 0 : buckets_(NULL) {
212 0 : init(&locale, status);
213 0 : }
214 :
215 :
216 0 : AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
217 : : inputList_(NULL),
218 : labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
219 : maxLabelCount_(99),
220 : initialLabels_(NULL), firstCharsInScripts_(NULL),
221 : collator_(collator), collatorPrimaryOnly_(NULL),
222 0 : buckets_(NULL) {
223 0 : init(NULL, status);
224 0 : }
225 :
226 :
227 :
228 0 : AlphabeticIndex::~AlphabeticIndex() {
229 0 : delete collator_;
230 0 : delete collatorPrimaryOnly_;
231 0 : delete firstCharsInScripts_;
232 0 : delete buckets_;
233 0 : delete inputList_;
234 0 : delete initialLabels_;
235 0 : }
236 :
237 :
238 0 : AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
239 0 : if (U_FAILURE(status)) {
240 0 : return *this;
241 : }
242 0 : initialLabels_->addAll(additions);
243 0 : clearBuckets();
244 0 : return *this;
245 : }
246 :
247 :
248 0 : AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
249 0 : addIndexExemplars(locale, status);
250 0 : clearBuckets();
251 0 : return *this;
252 : }
253 :
254 :
255 0 : AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
256 0 : if (U_FAILURE(errorCode)) { return NULL; }
257 : // In C++, the ImmutableIndex must own its copy of the BucketList,
258 : // even if it contains no records, for proper memory management.
259 : // We could clone the buckets_ if they are not NULL,
260 : // but that would be worth it only if this method is called multiple times,
261 : // or called after using the old-style bucket iterator API.
262 0 : LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
263 : LocalPointer<RuleBasedCollator> coll(
264 0 : static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
265 0 : if (immutableBucketList.isNull() || coll.isNull()) {
266 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
267 0 : return NULL;
268 : }
269 0 : ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
270 0 : if (immIndex == NULL) {
271 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
272 0 : return NULL;
273 : }
274 : // The ImmutableIndex adopted its parameter objects.
275 0 : immutableBucketList.orphan();
276 0 : coll.orphan();
277 0 : return immIndex;
278 : }
279 :
280 0 : int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
281 0 : initBuckets(status);
282 0 : if (U_FAILURE(status)) {
283 0 : return 0;
284 : }
285 0 : return buckets_->getBucketCount();
286 : }
287 :
288 :
289 0 : int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
290 0 : if (U_FAILURE(status) || inputList_ == NULL) {
291 0 : return 0;
292 : }
293 0 : return inputList_->size();
294 : }
295 :
296 0 : void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
297 0 : const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
298 0 : if (U_FAILURE(errorCode)) { return; }
299 :
300 0 : const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
301 : const UnicodeString &overflowBoundary =
302 0 : *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
303 :
304 : // We make a sorted array of elements.
305 : // Some of the input may be redundant.
306 : // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
307 : // We filter out those cases.
308 0 : UnicodeSetIterator iter(*initialLabels_);
309 0 : while (iter.next()) {
310 0 : const UnicodeString *item = &iter.getString();
311 0 : LocalPointer<UnicodeString> ownedItem;
312 : UBool checkDistinct;
313 0 : int32_t itemLength = item->length();
314 0 : if (!item->hasMoreChar32Than(0, itemLength, 1)) {
315 0 : checkDistinct = FALSE;
316 0 : } else if(item->charAt(itemLength - 1) == 0x2a && // '*'
317 0 : item->charAt(itemLength - 2) != 0x2a) {
318 : // Use a label if it is marked with one trailing star,
319 : // even if the label string sorts the same when all contractions are suppressed.
320 0 : ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
321 0 : item = ownedItem.getAlias();
322 0 : if (item == NULL) {
323 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
324 0 : return;
325 : }
326 0 : checkDistinct = FALSE;
327 : } else {
328 0 : checkDistinct = TRUE;
329 : }
330 0 : if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
331 : // Ignore a primary-ignorable or non-alphabetic index character.
332 0 : } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
333 : // Ignore an index character that will land in the overflow bucket.
334 0 : } else if (checkDistinct &&
335 0 : collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
336 : // Ignore a multi-code point index character that does not sort distinctly
337 : // from the sequence of its separate characters.
338 : } else {
339 0 : int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
340 0 : if (insertionPoint < 0) {
341 0 : indexCharacters.insertElementAt(
342 0 : ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
343 : } else {
344 0 : const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
345 0 : if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
346 : indexCharacters.setElementAt(
347 0 : ownedString(*item, ownedItem, errorCode), insertionPoint);
348 : }
349 : }
350 : }
351 : }
352 0 : if (U_FAILURE(errorCode)) { return; }
353 :
354 : // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
355 :
356 0 : int32_t size = indexCharacters.size() - 1;
357 0 : if (size > maxLabelCount_) {
358 0 : int32_t count = 0;
359 0 : int32_t old = -1;
360 0 : for (int32_t i = 0; i < indexCharacters.size();) {
361 0 : ++count;
362 0 : int32_t bump = count * maxLabelCount_ / size;
363 0 : if (bump == old) {
364 0 : indexCharacters.removeElementAt(i);
365 : } else {
366 0 : old = bump;
367 0 : ++i;
368 : }
369 : }
370 : }
371 : }
372 :
373 : namespace {
374 :
375 0 : const UnicodeString &fixLabel(const UnicodeString ¤t, UnicodeString &temp) {
376 0 : if (!current.startsWith(BASE, BASE_LENGTH)) {
377 0 : return current;
378 : }
379 0 : UChar rest = current.charAt(BASE_LENGTH);
380 0 : if (0x2800 < rest && rest <= 0x28FF) { // stroke count
381 0 : int32_t count = rest-0x2800;
382 0 : temp.setTo((UChar)(0x30 + count % 10));
383 0 : if (count >= 10) {
384 0 : count /= 10;
385 0 : temp.insert(0, (UChar)(0x30 + count % 10));
386 0 : if (count >= 10) {
387 0 : count /= 10;
388 0 : temp.insert(0, (UChar)(0x30 + count));
389 : }
390 : }
391 0 : return temp.append((UChar)0x5283);
392 : }
393 0 : return temp.setTo(current, BASE_LENGTH);
394 : }
395 :
396 0 : UBool hasMultiplePrimaryWeights(
397 : const RuleBasedCollator &coll, uint32_t variableTop,
398 : const UnicodeString &s, UVector64 &ces, UErrorCode &errorCode) {
399 0 : ces.removeAllElements();
400 0 : coll.internalGetCEs(s, ces, errorCode);
401 0 : if (U_FAILURE(errorCode)) { return FALSE; }
402 0 : UBool seenPrimary = FALSE;
403 0 : for (int32_t i = 0; i < ces.size(); ++i) {
404 0 : int64_t ce = ces.elementAti(i);
405 0 : uint32_t p = (uint32_t)(ce >> 32);
406 0 : if (p > variableTop) {
407 : // not primary ignorable
408 0 : if (seenPrimary) {
409 0 : return TRUE;
410 : }
411 0 : seenPrimary = TRUE;
412 : }
413 : }
414 0 : return FALSE;
415 : }
416 :
417 : } // namespace
418 :
419 0 : BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
420 : // Initialize indexCharacters.
421 0 : UVector indexCharacters(errorCode);
422 0 : indexCharacters.setDeleter(uprv_deleteUObject);
423 0 : initLabels(indexCharacters, errorCode);
424 0 : if (U_FAILURE(errorCode)) { return NULL; }
425 :
426 : // Variables for hasMultiplePrimaryWeights().
427 0 : UVector64 ces(errorCode);
428 : uint32_t variableTop;
429 0 : if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
430 0 : variableTop = collatorPrimaryOnly_->getVariableTop(errorCode);
431 : } else {
432 0 : variableTop = 0;
433 : }
434 0 : UBool hasInvisibleBuckets = FALSE;
435 :
436 : // Helper arrays for Chinese Pinyin collation.
437 : Bucket *asciiBuckets[26] = {
438 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
439 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
440 0 : };
441 : Bucket *pinyinBuckets[26] = {
442 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
443 : NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
444 0 : };
445 0 : UBool hasPinyin = FALSE;
446 :
447 0 : LocalPointer<UVector> bucketList(new UVector(errorCode), errorCode);
448 0 : if (U_FAILURE(errorCode)) {
449 0 : return NULL;
450 : }
451 0 : bucketList->setDeleter(uprv_deleteUObject);
452 :
453 : // underflow bucket
454 0 : Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
455 0 : if (bucket == NULL) {
456 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
457 0 : return NULL;
458 : }
459 0 : bucketList->addElement(bucket, errorCode);
460 0 : if (U_FAILURE(errorCode)) { return NULL; }
461 :
462 0 : UnicodeString temp;
463 :
464 : // fix up the list, adding underflow, additions, overflow
465 : // Insert inflow labels as needed.
466 0 : int32_t scriptIndex = -1;
467 0 : const UnicodeString *scriptUpperBoundary = &emptyString_;
468 0 : for (int32_t i = 0; i < indexCharacters.size(); ++i) {
469 0 : UnicodeString ¤t = *getString(indexCharacters, i);
470 0 : if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
471 : // We crossed the script boundary into a new script.
472 0 : const UnicodeString &inflowBoundary = *scriptUpperBoundary;
473 0 : UBool skippedScript = FALSE;
474 : for (;;) {
475 0 : scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
476 0 : if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
477 0 : break;
478 : }
479 0 : skippedScript = TRUE;
480 : }
481 0 : if (skippedScript && bucketList->size() > 1) {
482 : // We are skipping one or more scripts,
483 : // and we are not just getting out of the underflow label.
484 0 : bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
485 0 : if (bucket == NULL) {
486 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
487 0 : return NULL;
488 : }
489 0 : bucketList->addElement(bucket, errorCode);
490 : }
491 : }
492 : // Add a bucket with the current label.
493 0 : bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
494 0 : if (bucket == NULL) {
495 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
496 0 : return NULL;
497 : }
498 0 : bucketList->addElement(bucket, errorCode);
499 : // Remember ASCII and Pinyin buckets for Pinyin redirects.
500 : UChar c;
501 0 : if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z
502 0 : asciiBuckets[c - 0x41] = bucket;
503 0 : } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
504 0 : 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
505 0 : pinyinBuckets[c - 0x41] = bucket;
506 0 : hasPinyin = TRUE;
507 : }
508 : // Check for multiple primary weights.
509 0 : if (!current.startsWith(BASE, BASE_LENGTH) &&
510 0 : hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop, current,
511 0 : ces, errorCode) &&
512 0 : current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
513 : // "AE-ligature" or "Sch" etc.
514 0 : for (int32_t i = bucketList->size() - 2;; --i) {
515 0 : Bucket *singleBucket = getBucket(*bucketList, i);
516 0 : if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
517 : // There is no single-character bucket since the last
518 : // underflow or inflow label.
519 0 : break;
520 : }
521 0 : if (singleBucket->displayBucket_ == NULL &&
522 0 : !hasMultiplePrimaryWeights(*collatorPrimaryOnly_, variableTop,
523 : singleBucket->lowerBoundary_,
524 : ces, errorCode)) {
525 : // Add an invisible bucket that redirects strings greater than the expansion
526 : // to the previous single-character bucket.
527 : // For example, after ... Q R S Sch we add Sch\uFFFF->S
528 : // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
529 0 : bucket = new Bucket(emptyString_,
530 0 : UnicodeString(current).append((UChar)0xFFFF),
531 0 : U_ALPHAINDEX_NORMAL);
532 0 : if (bucket == NULL) {
533 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
534 0 : return NULL;
535 : }
536 0 : bucket->displayBucket_ = singleBucket;
537 0 : bucketList->addElement(bucket, errorCode);
538 0 : hasInvisibleBuckets = TRUE;
539 0 : break;
540 : }
541 0 : }
542 : }
543 : }
544 0 : if (U_FAILURE(errorCode)) { return NULL; }
545 0 : if (bucketList->size() == 1) {
546 : // No real labels, show only the underflow label.
547 0 : BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
548 0 : if (bl == NULL) {
549 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
550 0 : return NULL;
551 : }
552 0 : bucketList.orphan();
553 0 : return bl;
554 : }
555 : // overflow bucket
556 0 : bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
557 0 : if (bucket == NULL) {
558 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
559 0 : return NULL;
560 : }
561 0 : bucketList->addElement(bucket, errorCode); // final
562 :
563 0 : if (hasPinyin) {
564 : // Redirect Pinyin buckets.
565 0 : Bucket *asciiBucket = NULL;
566 0 : for (int32_t i = 0; i < 26; ++i) {
567 0 : if (asciiBuckets[i] != NULL) {
568 0 : asciiBucket = asciiBuckets[i];
569 : }
570 0 : if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
571 0 : pinyinBuckets[i]->displayBucket_ = asciiBucket;
572 0 : hasInvisibleBuckets = TRUE;
573 : }
574 : }
575 : }
576 :
577 0 : if (U_FAILURE(errorCode)) { return NULL; }
578 0 : if (!hasInvisibleBuckets) {
579 0 : BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
580 0 : if (bl == NULL) {
581 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
582 0 : return NULL;
583 : }
584 0 : bucketList.orphan();
585 0 : return bl;
586 : }
587 : // Merge inflow buckets that are visually adjacent.
588 : // Iterate backwards: Merge inflow into overflow rather than the other way around.
589 0 : int32_t i = bucketList->size() - 1;
590 0 : Bucket *nextBucket = getBucket(*bucketList, i);
591 0 : while (--i > 0) {
592 0 : bucket = getBucket(*bucketList, i);
593 0 : if (bucket->displayBucket_ != NULL) {
594 0 : continue; // skip invisible buckets
595 : }
596 0 : if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
597 0 : if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
598 0 : bucket->displayBucket_ = nextBucket;
599 0 : continue;
600 : }
601 : }
602 0 : nextBucket = bucket;
603 : }
604 :
605 0 : LocalPointer<UVector> publicBucketList(new UVector(errorCode), errorCode);
606 0 : if (U_FAILURE(errorCode)) {
607 0 : return NULL;
608 : }
609 : // Do not call publicBucketList->setDeleter():
610 : // This vector shares its objects with the bucketList.
611 0 : for (int32_t i = 0; i < bucketList->size(); ++i) {
612 0 : bucket = getBucket(*bucketList, i);
613 0 : if (bucket->displayBucket_ == NULL) {
614 0 : publicBucketList->addElement(bucket, errorCode);
615 : }
616 : }
617 0 : if (U_FAILURE(errorCode)) { return NULL; }
618 0 : BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
619 0 : if (bl == NULL) {
620 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
621 0 : return NULL;
622 : }
623 0 : bucketList.orphan();
624 0 : publicBucketList.orphan();
625 0 : return bl;
626 : }
627 :
628 : /**
629 : * Creates an index, and buckets and sorts the list of records into the index.
630 : */
631 0 : void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
632 0 : if (U_FAILURE(errorCode) || buckets_ != NULL) {
633 0 : return;
634 : }
635 0 : buckets_ = createBucketList(errorCode);
636 0 : if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
637 0 : return;
638 : }
639 :
640 : // Sort the records by name.
641 : // Stable sort preserves input order of collation duplicates.
642 0 : inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
643 :
644 : // Now, we traverse all of the input, which is now sorted.
645 : // If the item doesn't go in the current bucket, we find the next bucket that contains it.
646 : // This makes the process order n*log(n), since we just sort the list and then do a linear process.
647 : // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
648 : // we need to improve it for that case.
649 :
650 0 : Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
651 0 : int32_t bucketIndex = 1;
652 : Bucket *nextBucket;
653 : const UnicodeString *upperBoundary;
654 0 : if (bucketIndex < buckets_->bucketList_->size()) {
655 0 : nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
656 0 : upperBoundary = &nextBucket->lowerBoundary_;
657 : } else {
658 0 : nextBucket = NULL;
659 0 : upperBoundary = NULL;
660 : }
661 0 : for (int32_t i = 0; i < inputList_->size(); ++i) {
662 0 : Record *r = getRecord(*inputList_, i);
663 : // if the current bucket isn't the right one, find the one that is
664 : // We have a special flag for the last bucket so that we don't look any further
665 0 : while (upperBoundary != NULL &&
666 0 : collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
667 0 : currentBucket = nextBucket;
668 : // now reset the boundary that we compare against
669 0 : if (bucketIndex < buckets_->bucketList_->size()) {
670 0 : nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
671 0 : upperBoundary = &nextBucket->lowerBoundary_;
672 : } else {
673 0 : upperBoundary = NULL;
674 : }
675 : }
676 : // now put the record into the bucket.
677 0 : Bucket *bucket = currentBucket;
678 0 : if (bucket->displayBucket_ != NULL) {
679 0 : bucket = bucket->displayBucket_;
680 : }
681 0 : if (bucket->records_ == NULL) {
682 0 : bucket->records_ = new UVector(errorCode);
683 0 : if (bucket->records_ == NULL) {
684 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
685 0 : return;
686 : }
687 : }
688 0 : bucket->records_->addElement(r, errorCode);
689 : }
690 : }
691 :
692 0 : void AlphabeticIndex::clearBuckets() {
693 0 : if (buckets_ != NULL) {
694 0 : delete buckets_;
695 0 : buckets_ = NULL;
696 0 : internalResetBucketIterator();
697 : }
698 0 : }
699 :
700 0 : void AlphabeticIndex::internalResetBucketIterator() {
701 0 : labelsIterIndex_ = -1;
702 0 : currentBucket_ = NULL;
703 0 : }
704 :
705 :
706 0 : void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
707 0 : LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
708 0 : if (U_FAILURE(status)) {
709 0 : return;
710 : }
711 :
712 0 : UnicodeSet exemplars;
713 0 : ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
714 0 : if (U_SUCCESS(status)) {
715 0 : initialLabels_->addAll(exemplars);
716 0 : return;
717 : }
718 0 : status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
719 :
720 : // The locale data did not include explicit Index characters.
721 : // Synthesize a set of them from the locale's standard exemplar characters.
722 0 : ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
723 0 : if (U_FAILURE(status)) {
724 0 : return;
725 : }
726 :
727 : // question: should we add auxiliary exemplars?
728 0 : if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
729 0 : exemplars.add(0x61, 0x7A);
730 : }
731 0 : if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
732 : // cut down to small list
733 0 : exemplars.remove(0xAC00, 0xD7A3).
734 0 : add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
735 0 : add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
736 0 : add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
737 0 : add(0xD30C).add(0xD558);
738 : }
739 0 : if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
740 : // cut down to small list
741 : // make use of the fact that Ethiopic is allocated in 8's, where
742 : // the base is 0 mod 8.
743 : UnicodeSet ethiopic(
744 0 : UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
745 0 : UnicodeSetIterator it(ethiopic);
746 0 : while (it.next() && !it.isString()) {
747 0 : if ((it.getCodepoint() & 0x7) != 0) {
748 0 : exemplars.remove(it.getCodepoint());
749 : }
750 : }
751 : }
752 :
753 : // Upper-case any that aren't already so.
754 : // (We only do this for synthesized index characters.)
755 0 : UnicodeSetIterator it(exemplars);
756 0 : UnicodeString upperC;
757 0 : while (it.next()) {
758 0 : const UnicodeString &exemplarC = it.getString();
759 0 : upperC = exemplarC;
760 0 : upperC.toUpper(locale);
761 0 : initialLabels_->add(upperC);
762 : }
763 : }
764 :
765 0 : UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
766 0 : UnicodeSet contractions;
767 0 : collatorPrimaryOnly_->internalAddContractions(BASE[0], contractions, errorCode);
768 0 : if (U_FAILURE(errorCode) || contractions.isEmpty()) { return FALSE; }
769 0 : initialLabels_->addAll(contractions);
770 0 : UnicodeSetIterator iter(contractions);
771 0 : while (iter.next()) {
772 0 : const UnicodeString &s = iter.getString();
773 0 : U_ASSERT (s.startsWith(BASE, BASE_LENGTH));
774 0 : UChar c = s.charAt(s.length() - 1);
775 0 : if (0x41 <= c && c <= 0x5A) { // A-Z
776 : // There are Pinyin labels, add ASCII A-Z labels as well.
777 0 : initialLabels_->add(0x41, 0x5A); // A-Z
778 0 : break;
779 : }
780 : }
781 0 : return TRUE;
782 : }
783 :
784 :
785 : /*
786 : * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
787 : */
788 : static const UChar CGJ = 0x034F;
789 0 : UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
790 0 : UnicodeString result;
791 0 : if (item.length() == 0) {
792 0 : return result;
793 : }
794 0 : int32_t i = 0;
795 : for (;;) {
796 0 : UChar32 cp = item.char32At(i);
797 0 : result.append(cp);
798 0 : i = item.moveIndex32(i, 1);
799 0 : if (i >= item.length()) {
800 0 : break;
801 : }
802 0 : result.append(CGJ);
803 0 : }
804 0 : return result;
805 : }
806 :
807 :
808 0 : UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
809 0 : return FALSE;
810 : }
811 :
812 :
813 0 : UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
814 0 : return FALSE;
815 : }
816 :
817 :
818 0 : const RuleBasedCollator &AlphabeticIndex::getCollator() const {
819 0 : return *collator_;
820 : }
821 :
822 :
823 0 : const UnicodeString &AlphabeticIndex::getInflowLabel() const {
824 0 : return inflowLabel_;
825 : }
826 :
827 0 : const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
828 0 : return overflowLabel_;
829 : }
830 :
831 :
832 0 : const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
833 0 : return underflowLabel_;
834 : }
835 :
836 :
837 0 : AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
838 0 : inflowLabel_ = label;
839 0 : clearBuckets();
840 0 : return *this;
841 : }
842 :
843 :
844 0 : AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
845 0 : overflowLabel_ = label;
846 0 : clearBuckets();
847 0 : return *this;
848 : }
849 :
850 :
851 0 : AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
852 0 : underflowLabel_ = label;
853 0 : clearBuckets();
854 0 : return *this;
855 : }
856 :
857 :
858 0 : int32_t AlphabeticIndex::getMaxLabelCount() const {
859 0 : return maxLabelCount_;
860 : }
861 :
862 :
863 0 : AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
864 0 : if (U_FAILURE(status)) {
865 0 : return *this;
866 : }
867 0 : if (maxLabelCount <= 0) {
868 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
869 0 : return *this;
870 : }
871 0 : maxLabelCount_ = maxLabelCount;
872 0 : clearBuckets();
873 0 : return *this;
874 : }
875 :
876 :
877 : //
878 : // init() - Common code for constructors.
879 : //
880 :
881 0 : void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
882 0 : if (U_FAILURE(status)) { return; }
883 0 : if (locale == NULL && collator_ == NULL) {
884 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
885 0 : return;
886 : }
887 :
888 0 : initialLabels_ = new UnicodeSet();
889 0 : if (initialLabels_ == NULL) {
890 0 : status = U_MEMORY_ALLOCATION_ERROR;
891 0 : return;
892 : }
893 :
894 0 : inflowLabel_.setTo((UChar)0x2026); // Ellipsis
895 0 : overflowLabel_ = inflowLabel_;
896 0 : underflowLabel_ = inflowLabel_;
897 :
898 0 : if (collator_ == NULL) {
899 0 : Collator *coll = Collator::createInstance(*locale, status);
900 0 : if (U_FAILURE(status)) {
901 0 : delete coll;
902 0 : return;
903 : }
904 0 : if (coll == NULL) {
905 0 : status = U_MEMORY_ALLOCATION_ERROR;
906 0 : return;
907 : }
908 0 : collator_ = dynamic_cast<RuleBasedCollator *>(coll);
909 0 : if (collator_ == NULL) {
910 0 : delete coll;
911 0 : status = U_UNSUPPORTED_ERROR;
912 0 : return;
913 : }
914 : }
915 0 : collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
916 0 : if (collatorPrimaryOnly_ == NULL) {
917 0 : status = U_MEMORY_ALLOCATION_ERROR;
918 0 : return;
919 : }
920 0 : collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
921 0 : firstCharsInScripts_ = firstStringsInScript(status);
922 0 : if (U_FAILURE(status)) { return; }
923 0 : firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
924 : // Guard against a degenerate collator where
925 : // some script boundary strings are primary ignorable.
926 : for (;;) {
927 0 : if (U_FAILURE(status)) { return; }
928 0 : if (firstCharsInScripts_->isEmpty()) {
929 : // AlphabeticIndex requires some non-ignorable script boundary strings.
930 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
931 0 : return;
932 : }
933 0 : if (collatorPrimaryOnly_->compare(
934 0 : *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
935 0 : emptyString_, status) == UCOL_EQUAL) {
936 0 : firstCharsInScripts_->removeElementAt(0);
937 : } else {
938 0 : break;
939 : }
940 : }
941 :
942 : // Chinese index characters, which are specific to each of the several Chinese tailorings,
943 : // take precedence over the single locale data exemplar set per language.
944 0 : if (!addChineseIndexCharacters(status) && locale != NULL) {
945 0 : addIndexExemplars(*locale, status);
946 : }
947 : }
948 :
949 :
950 : //
951 : // Comparison function for UVector<UnicodeString *> sorting with a collator.
952 : //
953 : static int32_t U_CALLCONV
954 0 : collatorComparator(const void *context, const void *left, const void *right) {
955 0 : const UElement *leftElement = static_cast<const UElement *>(left);
956 0 : const UElement *rightElement = static_cast<const UElement *>(right);
957 0 : const UnicodeString *leftString = static_cast<const UnicodeString *>(leftElement->pointer);
958 0 : const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
959 :
960 0 : if (leftString == rightString) {
961 : // Catches case where both are NULL
962 0 : return 0;
963 : }
964 0 : if (leftString == NULL) {
965 0 : return 1;
966 : };
967 0 : if (rightString == NULL) {
968 0 : return -1;
969 : }
970 0 : const Collator *col = static_cast<const Collator *>(context);
971 0 : UErrorCode errorCode = U_ZERO_ERROR;
972 0 : return col->compare(*leftString, *rightString, errorCode);
973 : }
974 :
975 : //
976 : // Comparison function for UVector<Record *> sorting with a collator.
977 : //
978 : static int32_t U_CALLCONV
979 0 : recordCompareFn(const void *context, const void *left, const void *right) {
980 0 : const UElement *leftElement = static_cast<const UElement *>(left);
981 0 : const UElement *rightElement = static_cast<const UElement *>(right);
982 0 : const AlphabeticIndex::Record *leftRec = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
983 0 : const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
984 0 : const Collator *col = static_cast<const Collator *>(context);
985 0 : UErrorCode errorCode = U_ZERO_ERROR;
986 0 : return col->compare(leftRec->name_, rightRec->name_, errorCode);
987 : }
988 :
989 0 : UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
990 0 : if (U_FAILURE(status)) {
991 0 : return NULL;
992 : }
993 0 : LocalPointer<UVector> dest(new UVector(status), status);
994 0 : if (U_FAILURE(status)) {
995 0 : return NULL;
996 : }
997 0 : dest->setDeleter(uprv_deleteUObject);
998 : // Fetch the script-first-primary contractions which are defined in the root collator.
999 : // They all start with U+FDD1.
1000 0 : UnicodeSet set;
1001 0 : collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
1002 0 : if (U_FAILURE(status)) {
1003 0 : return NULL;
1004 : }
1005 0 : if (set.isEmpty()) {
1006 0 : status = U_UNSUPPORTED_ERROR;
1007 0 : return NULL;
1008 : }
1009 0 : UnicodeSetIterator iter(set);
1010 0 : while (iter.next()) {
1011 0 : const UnicodeString &boundary = iter.getString();
1012 0 : uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
1013 0 : if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
1014 : // Ignore boundaries for the special reordering groups.
1015 : // Take only those for "real scripts" (where the sample character is a Letter,
1016 : // and the one for unassigned implicit weights (Cn).
1017 0 : continue;
1018 : }
1019 0 : UnicodeString *s = new UnicodeString(boundary);
1020 0 : if (s == NULL) {
1021 0 : status = U_MEMORY_ALLOCATION_ERROR;
1022 0 : return NULL;
1023 : }
1024 0 : dest->addElement(s, status);
1025 : }
1026 0 : return dest.orphan();
1027 : }
1028 :
1029 :
1030 : namespace {
1031 :
1032 : /**
1033 : * Returns true if one index character string is "better" than the other.
1034 : * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
1035 : * better, and otherwise binary-less-than is better.
1036 : */
1037 0 : UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
1038 : const UnicodeString &one, const UnicodeString &other) {
1039 : // This is called with primary-equal strings, but never with one.equals(other).
1040 0 : UErrorCode status = U_ZERO_ERROR;
1041 0 : UnicodeString n1 = nfkdNormalizer.normalize(one, status);
1042 0 : UnicodeString n2 = nfkdNormalizer.normalize(other, status);
1043 0 : if (U_FAILURE(status)) { return FALSE; }
1044 0 : int32_t result = n1.countChar32() - n2.countChar32();
1045 0 : if (result != 0) {
1046 0 : return result < 0;
1047 : }
1048 0 : result = n1.compareCodePointOrder(n2);
1049 0 : if (result != 0) {
1050 0 : return result < 0;
1051 : }
1052 0 : return one.compareCodePointOrder(other) < 0;
1053 : }
1054 :
1055 : } // namespace
1056 :
1057 : //
1058 : // Constructor & Destructor for AlphabeticIndex::Record
1059 : //
1060 : // Records are internal only, instances are not directly surfaced in the public API.
1061 : // This class is mostly struct-like, with all public fields.
1062 :
1063 0 : AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
1064 0 : : name_(name), data_(data) {}
1065 :
1066 0 : AlphabeticIndex::Record::~Record() {
1067 0 : }
1068 :
1069 :
1070 0 : AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
1071 0 : if (U_FAILURE(status)) {
1072 0 : return *this;
1073 : }
1074 0 : if (inputList_ == NULL) {
1075 0 : inputList_ = new UVector(status);
1076 0 : if (inputList_ == NULL) {
1077 0 : status = U_MEMORY_ALLOCATION_ERROR;
1078 0 : return *this;
1079 : }
1080 0 : inputList_->setDeleter(alphaIndex_deleteRecord);
1081 : }
1082 0 : Record *r = new Record(name, data);
1083 0 : if (r == NULL) {
1084 0 : status = U_MEMORY_ALLOCATION_ERROR;
1085 0 : return *this;
1086 : }
1087 0 : inputList_->addElement(r, status);
1088 0 : clearBuckets();
1089 : //std::string ss;
1090 : //std::string ss2;
1091 : //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
1092 : // " sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
1093 0 : return *this;
1094 : }
1095 :
1096 :
1097 0 : AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
1098 0 : if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
1099 0 : inputList_->removeAllElements();
1100 0 : clearBuckets();
1101 : }
1102 0 : return *this;
1103 : }
1104 :
1105 0 : int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
1106 0 : initBuckets(status);
1107 0 : if (U_FAILURE(status)) {
1108 0 : return 0;
1109 : }
1110 0 : return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
1111 : }
1112 :
1113 :
1114 0 : int32_t AlphabeticIndex::getBucketIndex() const {
1115 0 : return labelsIterIndex_;
1116 : }
1117 :
1118 :
1119 0 : UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
1120 0 : if (U_FAILURE(status)) {
1121 0 : return FALSE;
1122 : }
1123 0 : if (buckets_ == NULL && currentBucket_ != NULL) {
1124 0 : status = U_ENUM_OUT_OF_SYNC_ERROR;
1125 0 : return FALSE;
1126 : }
1127 0 : initBuckets(status);
1128 0 : if (U_FAILURE(status)) {
1129 0 : return FALSE;
1130 : }
1131 0 : ++labelsIterIndex_;
1132 0 : if (labelsIterIndex_ >= buckets_->getBucketCount()) {
1133 0 : labelsIterIndex_ = buckets_->getBucketCount();
1134 0 : return FALSE;
1135 : }
1136 0 : currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
1137 0 : resetRecordIterator();
1138 0 : return TRUE;
1139 : }
1140 :
1141 0 : const UnicodeString &AlphabeticIndex::getBucketLabel() const {
1142 0 : if (currentBucket_ != NULL) {
1143 0 : return currentBucket_->label_;
1144 : } else {
1145 0 : return emptyString_;
1146 : }
1147 : }
1148 :
1149 :
1150 0 : UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
1151 0 : if (currentBucket_ != NULL) {
1152 0 : return currentBucket_->labelType_;
1153 : } else {
1154 0 : return U_ALPHAINDEX_NORMAL;
1155 : }
1156 : }
1157 :
1158 :
1159 0 : int32_t AlphabeticIndex::getBucketRecordCount() const {
1160 0 : if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
1161 0 : return currentBucket_->records_->size();
1162 : } else {
1163 0 : return 0;
1164 : }
1165 : }
1166 :
1167 :
1168 0 : AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
1169 0 : if (U_FAILURE(status)) {
1170 0 : return *this;
1171 : }
1172 0 : internalResetBucketIterator();
1173 0 : return *this;
1174 : }
1175 :
1176 :
1177 0 : UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
1178 0 : if (U_FAILURE(status)) {
1179 0 : return FALSE;
1180 : }
1181 0 : if (currentBucket_ == NULL) {
1182 : // We are trying to iterate over the items in a bucket, but there is no
1183 : // current bucket from the enumeration of buckets.
1184 0 : status = U_INVALID_STATE_ERROR;
1185 0 : return FALSE;
1186 : }
1187 0 : if (buckets_ == NULL) {
1188 0 : status = U_ENUM_OUT_OF_SYNC_ERROR;
1189 0 : return FALSE;
1190 : }
1191 0 : if (currentBucket_->records_ == NULL) {
1192 0 : return FALSE;
1193 : }
1194 0 : ++itemsIterIndex_;
1195 0 : if (itemsIterIndex_ >= currentBucket_->records_->size()) {
1196 0 : itemsIterIndex_ = currentBucket_->records_->size();
1197 0 : return FALSE;
1198 : }
1199 0 : return TRUE;
1200 : }
1201 :
1202 :
1203 0 : const UnicodeString &AlphabeticIndex::getRecordName() const {
1204 0 : const UnicodeString *retStr = &emptyString_;
1205 0 : if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1206 0 : itemsIterIndex_ >= 0 &&
1207 0 : itemsIterIndex_ < currentBucket_->records_->size()) {
1208 0 : Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1209 0 : retStr = &item->name_;
1210 : }
1211 0 : return *retStr;
1212 : }
1213 :
1214 0 : const void *AlphabeticIndex::getRecordData() const {
1215 0 : const void *retPtr = NULL;
1216 0 : if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
1217 0 : itemsIterIndex_ >= 0 &&
1218 0 : itemsIterIndex_ < currentBucket_->records_->size()) {
1219 0 : Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
1220 0 : retPtr = item->data_;
1221 : }
1222 0 : return retPtr;
1223 : }
1224 :
1225 :
1226 0 : AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
1227 0 : itemsIterIndex_ = -1;
1228 0 : return *this;
1229 : }
1230 :
1231 :
1232 :
1233 0 : AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
1234 : const UnicodeString &lowerBoundary,
1235 0 : UAlphabeticIndexLabelType type)
1236 : : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
1237 : displayBucket_(NULL), displayIndex_(-1),
1238 0 : records_(NULL) {
1239 0 : }
1240 :
1241 :
1242 0 : AlphabeticIndex::Bucket::~Bucket() {
1243 0 : delete records_;
1244 0 : }
1245 :
1246 : U_NAMESPACE_END
1247 :
1248 : #endif // !UCONFIG_NO_COLLATION
|