Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 1996-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * rulebasedcollator.cpp
9 : *
10 : * (replaced the former tblcoll.cpp)
11 : *
12 : * created on: 2012feb14 with new and old collation code
13 : * created by: Markus W. Scherer
14 : */
15 :
16 : #include "unicode/utypes.h"
17 :
18 : #if !UCONFIG_NO_COLLATION
19 :
20 : #include "unicode/coll.h"
21 : #include "unicode/coleitr.h"
22 : #include "unicode/localpointer.h"
23 : #include "unicode/locid.h"
24 : #include "unicode/sortkey.h"
25 : #include "unicode/tblcoll.h"
26 : #include "unicode/ucol.h"
27 : #include "unicode/uiter.h"
28 : #include "unicode/uloc.h"
29 : #include "unicode/uniset.h"
30 : #include "unicode/unistr.h"
31 : #include "unicode/usetiter.h"
32 : #include "unicode/utf8.h"
33 : #include "unicode/uversion.h"
34 : #include "bocsu.h"
35 : #include "charstr.h"
36 : #include "cmemory.h"
37 : #include "collation.h"
38 : #include "collationcompare.h"
39 : #include "collationdata.h"
40 : #include "collationdatareader.h"
41 : #include "collationfastlatin.h"
42 : #include "collationiterator.h"
43 : #include "collationkeys.h"
44 : #include "collationroot.h"
45 : #include "collationsets.h"
46 : #include "collationsettings.h"
47 : #include "collationtailoring.h"
48 : #include "cstring.h"
49 : #include "uassert.h"
50 : #include "ucol_imp.h"
51 : #include "uhash.h"
52 : #include "uitercollationiterator.h"
53 : #include "ustr_imp.h"
54 : #include "utf16collationiterator.h"
55 : #include "utf8collationiterator.h"
56 : #include "uvectr64.h"
57 :
58 : U_NAMESPACE_BEGIN
59 :
60 : namespace {
61 :
62 : class FixedSortKeyByteSink : public SortKeyByteSink {
63 : public:
64 0 : FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65 0 : : SortKeyByteSink(dest, destCapacity) {}
66 : virtual ~FixedSortKeyByteSink();
67 :
68 : private:
69 : virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
70 : virtual UBool Resize(int32_t appendCapacity, int32_t length);
71 : };
72 :
73 0 : FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74 :
75 : void
76 0 : FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77 : // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78 : // Fill the buffer completely.
79 0 : int32_t available = capacity_ - length;
80 0 : if (available > 0) {
81 0 : uprv_memcpy(buffer_ + length, bytes, available);
82 : }
83 0 : }
84 :
85 : UBool
86 0 : FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
87 0 : return FALSE;
88 : }
89 :
90 : } // namespace
91 :
92 : // Not in an anonymous namespace, so that it can be a friend of CollationKey.
93 : class CollationKeyByteSink : public SortKeyByteSink {
94 : public:
95 0 : CollationKeyByteSink(CollationKey &key)
96 0 : : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97 0 : key_(key) {}
98 : virtual ~CollationKeyByteSink();
99 :
100 : private:
101 : virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
102 : virtual UBool Resize(int32_t appendCapacity, int32_t length);
103 :
104 : CollationKey &key_;
105 : };
106 :
107 0 : CollationKeyByteSink::~CollationKeyByteSink() {}
108 :
109 : void
110 0 : CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111 : // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112 0 : if (Resize(n, length)) {
113 0 : uprv_memcpy(buffer_ + length, bytes, n);
114 : }
115 0 : }
116 :
117 : UBool
118 0 : CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119 0 : if (buffer_ == NULL) {
120 0 : return FALSE; // allocation failed before already
121 : }
122 0 : int32_t newCapacity = 2 * capacity_;
123 0 : int32_t altCapacity = length + 2 * appendCapacity;
124 0 : if (newCapacity < altCapacity) {
125 0 : newCapacity = altCapacity;
126 : }
127 0 : if (newCapacity < 200) {
128 0 : newCapacity = 200;
129 : }
130 0 : uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131 0 : if (newBuffer == NULL) {
132 0 : SetNotOk();
133 0 : return FALSE;
134 : }
135 0 : buffer_ = reinterpret_cast<char *>(newBuffer);
136 0 : capacity_ = newCapacity;
137 0 : return TRUE;
138 : }
139 :
140 0 : RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141 : : Collator(other),
142 0 : data(other.data),
143 0 : settings(other.settings),
144 0 : tailoring(other.tailoring),
145 0 : cacheEntry(other.cacheEntry),
146 : validLocale(other.validLocale),
147 0 : explicitlySetAttributes(other.explicitlySetAttributes),
148 0 : actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
149 0 : settings->addRef();
150 0 : cacheEntry->addRef();
151 0 : }
152 :
153 0 : RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
154 0 : const RuleBasedCollator *base, UErrorCode &errorCode)
155 : : data(NULL),
156 : settings(NULL),
157 : tailoring(NULL),
158 : cacheEntry(NULL),
159 : validLocale(""),
160 : explicitlySetAttributes(0),
161 0 : actualLocaleIsSameAsValid(FALSE) {
162 0 : if(U_FAILURE(errorCode)) { return; }
163 0 : if(bin == NULL || length == 0 || base == NULL) {
164 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
165 0 : return;
166 : }
167 0 : const CollationTailoring *root = CollationRoot::getRoot(errorCode);
168 0 : if(U_FAILURE(errorCode)) { return; }
169 0 : if(base->tailoring != root) {
170 0 : errorCode = U_UNSUPPORTED_ERROR;
171 0 : return;
172 : }
173 0 : LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
174 0 : if(t.isNull() || t->isBogus()) {
175 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
176 0 : return;
177 : }
178 0 : CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
179 0 : if(U_FAILURE(errorCode)) { return; }
180 0 : t->actualLocale.setToBogus();
181 0 : adoptTailoring(t.orphan(), errorCode);
182 : }
183 :
184 0 : RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
185 0 : : data(entry->tailoring->data),
186 0 : settings(entry->tailoring->settings),
187 0 : tailoring(entry->tailoring),
188 : cacheEntry(entry),
189 : validLocale(entry->validLocale),
190 : explicitlySetAttributes(0),
191 0 : actualLocaleIsSameAsValid(FALSE) {
192 0 : settings->addRef();
193 0 : cacheEntry->addRef();
194 0 : }
195 :
196 0 : RuleBasedCollator::~RuleBasedCollator() {
197 0 : SharedObject::clearPtr(settings);
198 0 : SharedObject::clearPtr(cacheEntry);
199 0 : }
200 :
201 : void
202 0 : RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
203 0 : if(U_FAILURE(errorCode)) {
204 0 : t->deleteIfZeroRefCount();
205 0 : return;
206 : }
207 0 : U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
208 0 : cacheEntry = new CollationCacheEntry(t->actualLocale, t);
209 0 : if(cacheEntry == NULL) {
210 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
211 0 : t->deleteIfZeroRefCount();
212 0 : return;
213 : }
214 0 : data = t->data;
215 0 : settings = t->settings;
216 0 : settings->addRef();
217 0 : tailoring = t;
218 0 : cacheEntry->addRef();
219 0 : validLocale = t->actualLocale;
220 0 : actualLocaleIsSameAsValid = FALSE;
221 : }
222 :
223 : Collator *
224 0 : RuleBasedCollator::clone() const {
225 0 : return new RuleBasedCollator(*this);
226 : }
227 :
228 0 : RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
229 0 : if(this == &other) { return *this; }
230 0 : SharedObject::copyPtr(other.settings, settings);
231 0 : tailoring = other.tailoring;
232 0 : SharedObject::copyPtr(other.cacheEntry, cacheEntry);
233 0 : data = tailoring->data;
234 0 : validLocale = other.validLocale;
235 0 : explicitlySetAttributes = other.explicitlySetAttributes;
236 0 : actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
237 0 : return *this;
238 : }
239 :
240 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
241 :
242 : UBool
243 0 : RuleBasedCollator::operator==(const Collator& other) const {
244 0 : if(this == &other) { return TRUE; }
245 0 : if(!Collator::operator==(other)) { return FALSE; }
246 0 : const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
247 0 : if(*settings != *o.settings) { return FALSE; }
248 0 : if(data == o.data) { return TRUE; }
249 0 : UBool thisIsRoot = data->base == NULL;
250 0 : UBool otherIsRoot = o.data->base == NULL;
251 0 : U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be ==
252 0 : if(thisIsRoot != otherIsRoot) { return FALSE; }
253 0 : if((thisIsRoot || !tailoring->rules.isEmpty()) &&
254 0 : (otherIsRoot || !o.tailoring->rules.isEmpty())) {
255 : // Shortcut: If both collators have valid rule strings, then compare those.
256 0 : if(tailoring->rules == o.tailoring->rules) { return TRUE; }
257 : }
258 : // Different rule strings can result in the same or equivalent tailoring.
259 : // The rule strings are optional in ICU resource bundles, although included by default.
260 : // cloneBinary() drops the rule string.
261 0 : UErrorCode errorCode = U_ZERO_ERROR;
262 0 : LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
263 0 : LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
264 0 : if(U_FAILURE(errorCode)) { return FALSE; }
265 0 : if(*thisTailored != *otherTailored) { return FALSE; }
266 : // For completeness, we should compare all of the mappings;
267 : // or we should create a list of strings, sort it with one collator,
268 : // and check if both collators compare adjacent strings the same
269 : // (order & strength, down to quaternary); or similar.
270 : // Testing equality of collators seems unusual.
271 0 : return TRUE;
272 : }
273 :
274 : int32_t
275 0 : RuleBasedCollator::hashCode() const {
276 0 : int32_t h = settings->hashCode();
277 0 : if(data->base == NULL) { return h; } // root collator
278 : // Do not rely on the rule string, see comments in operator==().
279 0 : UErrorCode errorCode = U_ZERO_ERROR;
280 0 : LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
281 0 : if(U_FAILURE(errorCode)) { return 0; }
282 0 : UnicodeSetIterator iter(*set);
283 0 : while(iter.next() && !iter.isString()) {
284 0 : h ^= data->getCE32(iter.getCodepoint());
285 : }
286 0 : return h;
287 : }
288 :
289 : void
290 0 : RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
291 : const Locale &actual) {
292 0 : if(actual == tailoring->actualLocale) {
293 0 : actualLocaleIsSameAsValid = FALSE;
294 : } else {
295 0 : U_ASSERT(actual == valid);
296 0 : actualLocaleIsSameAsValid = TRUE;
297 : }
298 : // Do not modify tailoring.actualLocale:
299 : // We cannot be sure that that would be thread-safe.
300 0 : validLocale = valid;
301 : (void)requested; // Ignore, see also ticket #10477.
302 0 : }
303 :
304 : Locale
305 0 : RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
306 0 : if(U_FAILURE(errorCode)) {
307 0 : return Locale::getRoot();
308 : }
309 0 : switch(type) {
310 : case ULOC_ACTUAL_LOCALE:
311 0 : return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
312 : case ULOC_VALID_LOCALE:
313 0 : return validLocale;
314 : case ULOC_REQUESTED_LOCALE:
315 : default:
316 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
317 0 : return Locale::getRoot();
318 : }
319 : }
320 :
321 : const char *
322 0 : RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
323 0 : if(U_FAILURE(errorCode)) {
324 0 : return NULL;
325 : }
326 : const Locale *result;
327 0 : switch(type) {
328 : case ULOC_ACTUAL_LOCALE:
329 0 : result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
330 0 : break;
331 : case ULOC_VALID_LOCALE:
332 0 : result = &validLocale;
333 0 : break;
334 : case ULOC_REQUESTED_LOCALE:
335 : default:
336 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
337 0 : return NULL;
338 : }
339 0 : if(result->isBogus()) { return NULL; }
340 0 : const char *id = result->getName();
341 0 : return id[0] == 0 ? "root" : id;
342 : }
343 :
344 : const UnicodeString&
345 0 : RuleBasedCollator::getRules() const {
346 0 : return tailoring->rules;
347 : }
348 :
349 : void
350 0 : RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
351 0 : if(delta == UCOL_TAILORING_ONLY) {
352 0 : buffer = tailoring->rules;
353 0 : return;
354 : }
355 : // UCOL_FULL_RULES
356 0 : buffer.remove();
357 0 : CollationLoader::appendRootRules(buffer);
358 0 : buffer.append(tailoring->rules).getTerminatedBuffer();
359 : }
360 :
361 : void
362 0 : RuleBasedCollator::getVersion(UVersionInfo version) const {
363 0 : uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
364 0 : version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
365 0 : }
366 :
367 : UnicodeSet *
368 0 : RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
369 0 : if(U_FAILURE(errorCode)) { return NULL; }
370 0 : UnicodeSet *tailored = new UnicodeSet();
371 0 : if(tailored == NULL) {
372 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
373 0 : return NULL;
374 : }
375 0 : if(data->base != NULL) {
376 0 : TailoredSet(tailored).forData(data, errorCode);
377 0 : if(U_FAILURE(errorCode)) {
378 0 : delete tailored;
379 0 : return NULL;
380 : }
381 : }
382 0 : return tailored;
383 : }
384 :
385 : void
386 0 : RuleBasedCollator::internalGetContractionsAndExpansions(
387 : UnicodeSet *contractions, UnicodeSet *expansions,
388 : UBool addPrefixes, UErrorCode &errorCode) const {
389 0 : if(U_FAILURE(errorCode)) { return; }
390 0 : if(contractions != NULL) {
391 0 : contractions->clear();
392 : }
393 0 : if(expansions != NULL) {
394 0 : expansions->clear();
395 : }
396 0 : ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
397 : }
398 :
399 : void
400 0 : RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
401 0 : if(U_FAILURE(errorCode)) { return; }
402 0 : ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
403 : }
404 :
405 : const CollationSettings &
406 0 : RuleBasedCollator::getDefaultSettings() const {
407 0 : return *tailoring->settings;
408 : }
409 :
410 : UColAttributeValue
411 0 : RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
412 0 : if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
413 : int32_t option;
414 0 : switch(attr) {
415 : case UCOL_FRENCH_COLLATION:
416 0 : option = CollationSettings::BACKWARD_SECONDARY;
417 0 : break;
418 : case UCOL_ALTERNATE_HANDLING:
419 0 : return settings->getAlternateHandling();
420 : case UCOL_CASE_FIRST:
421 0 : return settings->getCaseFirst();
422 : case UCOL_CASE_LEVEL:
423 0 : option = CollationSettings::CASE_LEVEL;
424 0 : break;
425 : case UCOL_NORMALIZATION_MODE:
426 0 : option = CollationSettings::CHECK_FCD;
427 0 : break;
428 : case UCOL_STRENGTH:
429 0 : return (UColAttributeValue)settings->getStrength();
430 : case UCOL_HIRAGANA_QUATERNARY_MODE:
431 : // Deprecated attribute, unsettable.
432 0 : return UCOL_OFF;
433 : case UCOL_NUMERIC_COLLATION:
434 0 : option = CollationSettings::NUMERIC;
435 0 : break;
436 : default:
437 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
438 0 : return UCOL_DEFAULT;
439 : }
440 0 : return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
441 : }
442 :
443 : void
444 0 : RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
445 : UErrorCode &errorCode) {
446 0 : UColAttributeValue oldValue = getAttribute(attr, errorCode);
447 0 : if(U_FAILURE(errorCode)) { return; }
448 0 : if(value == oldValue) {
449 0 : setAttributeExplicitly(attr);
450 0 : return;
451 : }
452 0 : const CollationSettings &defaultSettings = getDefaultSettings();
453 0 : if(settings == &defaultSettings) {
454 0 : if(value == UCOL_DEFAULT) {
455 0 : setAttributeDefault(attr);
456 0 : return;
457 : }
458 : }
459 0 : CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
460 0 : if(ownedSettings == NULL) {
461 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
462 0 : return;
463 : }
464 :
465 0 : switch(attr) {
466 : case UCOL_FRENCH_COLLATION:
467 : ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
468 0 : defaultSettings.options, errorCode);
469 0 : break;
470 : case UCOL_ALTERNATE_HANDLING:
471 0 : ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
472 0 : break;
473 : case UCOL_CASE_FIRST:
474 0 : ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
475 0 : break;
476 : case UCOL_CASE_LEVEL:
477 : ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
478 0 : defaultSettings.options, errorCode);
479 0 : break;
480 : case UCOL_NORMALIZATION_MODE:
481 : ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
482 0 : defaultSettings.options, errorCode);
483 0 : break;
484 : case UCOL_STRENGTH:
485 0 : ownedSettings->setStrength(value, defaultSettings.options, errorCode);
486 0 : break;
487 : case UCOL_HIRAGANA_QUATERNARY_MODE:
488 : // Deprecated attribute. Check for valid values but do not change anything.
489 0 : if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
490 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
491 : }
492 0 : break;
493 : case UCOL_NUMERIC_COLLATION:
494 0 : ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
495 0 : break;
496 : default:
497 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
498 0 : break;
499 : }
500 0 : if(U_FAILURE(errorCode)) { return; }
501 0 : setFastLatinOptions(*ownedSettings);
502 0 : if(value == UCOL_DEFAULT) {
503 0 : setAttributeDefault(attr);
504 : } else {
505 0 : setAttributeExplicitly(attr);
506 : }
507 : }
508 :
509 : Collator &
510 0 : RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
511 0 : if(U_FAILURE(errorCode)) { return *this; }
512 : // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
513 : int32_t value;
514 0 : if(group == UCOL_REORDER_CODE_DEFAULT) {
515 0 : value = UCOL_DEFAULT;
516 0 : } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
517 0 : value = group - UCOL_REORDER_CODE_FIRST;
518 : } else {
519 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
520 0 : return *this;
521 : }
522 0 : CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
523 0 : if(value == oldValue) {
524 0 : setAttributeExplicitly(ATTR_VARIABLE_TOP);
525 0 : return *this;
526 : }
527 0 : const CollationSettings &defaultSettings = getDefaultSettings();
528 0 : if(settings == &defaultSettings) {
529 0 : if(value == UCOL_DEFAULT) {
530 0 : setAttributeDefault(ATTR_VARIABLE_TOP);
531 0 : return *this;
532 : }
533 : }
534 0 : CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
535 0 : if(ownedSettings == NULL) {
536 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
537 0 : return *this;
538 : }
539 :
540 0 : if(group == UCOL_REORDER_CODE_DEFAULT) {
541 0 : group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
542 : }
543 0 : uint32_t varTop = data->getLastPrimaryForGroup(group);
544 0 : U_ASSERT(varTop != 0);
545 0 : ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
546 0 : if(U_FAILURE(errorCode)) { return *this; }
547 0 : ownedSettings->variableTop = varTop;
548 0 : setFastLatinOptions(*ownedSettings);
549 0 : if(value == UCOL_DEFAULT) {
550 0 : setAttributeDefault(ATTR_VARIABLE_TOP);
551 : } else {
552 0 : setAttributeExplicitly(ATTR_VARIABLE_TOP);
553 : }
554 0 : return *this;
555 : }
556 :
557 : UColReorderCode
558 0 : RuleBasedCollator::getMaxVariable() const {
559 0 : return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
560 : }
561 :
562 : uint32_t
563 0 : RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
564 0 : return settings->variableTop;
565 : }
566 :
567 : uint32_t
568 0 : RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
569 0 : if(U_FAILURE(errorCode)) { return 0; }
570 0 : if(varTop == NULL && len !=0) {
571 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
572 0 : return 0;
573 : }
574 0 : if(len < 0) { len = u_strlen(varTop); }
575 0 : if(len == 0) {
576 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
577 0 : return 0;
578 : }
579 0 : UBool numeric = settings->isNumeric();
580 : int64_t ce1, ce2;
581 0 : if(settings->dontCheckFCD()) {
582 0 : UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
583 0 : ce1 = ci.nextCE(errorCode);
584 0 : ce2 = ci.nextCE(errorCode);
585 : } else {
586 0 : FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
587 0 : ce1 = ci.nextCE(errorCode);
588 0 : ce2 = ci.nextCE(errorCode);
589 : }
590 0 : if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
591 0 : errorCode = U_CE_NOT_FOUND_ERROR;
592 0 : return 0;
593 : }
594 0 : setVariableTop((uint32_t)(ce1 >> 32), errorCode);
595 0 : return settings->variableTop;
596 : }
597 :
598 : uint32_t
599 0 : RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
600 0 : return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
601 : }
602 :
603 : void
604 0 : RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
605 0 : if(U_FAILURE(errorCode)) { return; }
606 0 : if(varTop != settings->variableTop) {
607 : // Pin the variable top to the end of the reordering group which contains it.
608 : // Only a few special groups are supported.
609 0 : int32_t group = data->getGroupForPrimary(varTop);
610 0 : if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
611 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
612 0 : return;
613 : }
614 0 : uint32_t v = data->getLastPrimaryForGroup(group);
615 0 : U_ASSERT(v != 0 && v >= varTop);
616 0 : varTop = v;
617 0 : if(varTop != settings->variableTop) {
618 0 : CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
619 0 : if(ownedSettings == NULL) {
620 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
621 0 : return;
622 : }
623 0 : ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
624 0 : getDefaultSettings().options, errorCode);
625 0 : if(U_FAILURE(errorCode)) { return; }
626 0 : ownedSettings->variableTop = varTop;
627 0 : setFastLatinOptions(*ownedSettings);
628 : }
629 : }
630 0 : if(varTop == getDefaultSettings().variableTop) {
631 0 : setAttributeDefault(ATTR_VARIABLE_TOP);
632 : } else {
633 0 : setAttributeExplicitly(ATTR_VARIABLE_TOP);
634 : }
635 : }
636 :
637 : int32_t
638 0 : RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
639 : UErrorCode &errorCode) const {
640 0 : if(U_FAILURE(errorCode)) { return 0; }
641 0 : if(capacity < 0 || (dest == NULL && capacity > 0)) {
642 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
643 0 : return 0;
644 : }
645 0 : int32_t length = settings->reorderCodesLength;
646 0 : if(length == 0) { return 0; }
647 0 : if(length > capacity) {
648 0 : errorCode = U_BUFFER_OVERFLOW_ERROR;
649 0 : return length;
650 : }
651 0 : uprv_memcpy(dest, settings->reorderCodes, length * 4);
652 0 : return length;
653 : }
654 :
655 : void
656 0 : RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
657 : UErrorCode &errorCode) {
658 0 : if(U_FAILURE(errorCode)) { return; }
659 0 : if(length < 0 || (reorderCodes == NULL && length > 0)) {
660 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
661 0 : return;
662 : }
663 0 : if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
664 0 : length = 0;
665 : }
666 0 : if(length == settings->reorderCodesLength &&
667 0 : uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
668 0 : return;
669 : }
670 0 : const CollationSettings &defaultSettings = getDefaultSettings();
671 0 : if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
672 0 : if(settings != &defaultSettings) {
673 0 : CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
674 0 : if(ownedSettings == NULL) {
675 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
676 0 : return;
677 : }
678 0 : ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
679 0 : setFastLatinOptions(*ownedSettings);
680 : }
681 0 : return;
682 : }
683 0 : CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
684 0 : if(ownedSettings == NULL) {
685 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
686 0 : return;
687 : }
688 0 : ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
689 0 : setFastLatinOptions(*ownedSettings);
690 : }
691 :
692 : void
693 0 : RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
694 0 : ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
695 0 : data, ownedSettings,
696 : ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
697 0 : }
698 :
699 : UCollationResult
700 0 : RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
701 : UErrorCode &errorCode) const {
702 0 : if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
703 0 : return doCompare(left.getBuffer(), left.length(),
704 0 : right.getBuffer(), right.length(), errorCode);
705 : }
706 :
707 : UCollationResult
708 0 : RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
709 : int32_t length, UErrorCode &errorCode) const {
710 0 : if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
711 0 : if(length < 0) {
712 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
713 0 : return UCOL_EQUAL;
714 : }
715 0 : int32_t leftLength = left.length();
716 0 : int32_t rightLength = right.length();
717 0 : if(leftLength > length) { leftLength = length; }
718 0 : if(rightLength > length) { rightLength = length; }
719 0 : return doCompare(left.getBuffer(), leftLength,
720 0 : right.getBuffer(), rightLength, errorCode);
721 : }
722 :
723 : UCollationResult
724 0 : RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
725 : const UChar *right, int32_t rightLength,
726 : UErrorCode &errorCode) const {
727 0 : if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
728 0 : if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
729 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
730 0 : return UCOL_EQUAL;
731 : }
732 : // Make sure both or neither strings have a known length.
733 : // We do not optimize for mixed length/termination.
734 0 : if(leftLength >= 0) {
735 0 : if(rightLength < 0) { rightLength = u_strlen(right); }
736 : } else {
737 0 : if(rightLength >= 0) { leftLength = u_strlen(left); }
738 : }
739 0 : return doCompare(left, leftLength, right, rightLength, errorCode);
740 : }
741 :
742 : UCollationResult
743 0 : RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
744 : UErrorCode &errorCode) const {
745 0 : if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
746 0 : const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
747 0 : const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
748 0 : if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
749 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
750 0 : return UCOL_EQUAL;
751 : }
752 0 : return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
753 : }
754 :
755 : UCollationResult
756 0 : RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
757 : const char *right, int32_t rightLength,
758 : UErrorCode &errorCode) const {
759 0 : if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
760 0 : if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
761 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
762 0 : return UCOL_EQUAL;
763 : }
764 : // Make sure both or neither strings have a known length.
765 : // We do not optimize for mixed length/termination.
766 0 : if(leftLength >= 0) {
767 0 : if(rightLength < 0) { rightLength = uprv_strlen(right); }
768 : } else {
769 0 : if(rightLength >= 0) { leftLength = uprv_strlen(left); }
770 : }
771 : return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
772 0 : reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
773 : }
774 :
775 : namespace {
776 :
777 : /**
778 : * Abstract iterator for identical-level string comparisons.
779 : * Returns FCD code points and handles temporary switching to NFD.
780 : */
781 : class NFDIterator : public UObject {
782 : public:
783 0 : NFDIterator() : index(-1), length(0) {}
784 0 : virtual ~NFDIterator() {}
785 : /**
786 : * Returns the next code point from the internal normalization buffer,
787 : * or else the next text code point.
788 : * Returns -1 at the end of the text.
789 : */
790 0 : UChar32 nextCodePoint() {
791 0 : if(index >= 0) {
792 0 : if(index == length) {
793 0 : index = -1;
794 : } else {
795 : UChar32 c;
796 0 : U16_NEXT_UNSAFE(decomp, index, c);
797 0 : return c;
798 : }
799 : }
800 0 : return nextRawCodePoint();
801 : }
802 : /**
803 : * @param nfcImpl
804 : * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
805 : * @return the first code point in c's decomposition,
806 : * or c itself if it was decomposed already or if it does not decompose
807 : */
808 0 : UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
809 0 : if(index >= 0) { return c; }
810 0 : decomp = nfcImpl.getDecomposition(c, buffer, length);
811 0 : if(decomp == NULL) { return c; }
812 0 : index = 0;
813 0 : U16_NEXT_UNSAFE(decomp, index, c);
814 0 : return c;
815 : }
816 : protected:
817 : /**
818 : * Returns the next text code point in FCD order.
819 : * Returns -1 at the end of the text.
820 : */
821 : virtual UChar32 nextRawCodePoint() = 0;
822 : private:
823 : const UChar *decomp;
824 : UChar buffer[4];
825 : int32_t index;
826 : int32_t length;
827 : };
828 :
829 0 : class UTF16NFDIterator : public NFDIterator {
830 : public:
831 0 : UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
832 : protected:
833 0 : virtual UChar32 nextRawCodePoint() {
834 0 : if(s == limit) { return U_SENTINEL; }
835 0 : UChar32 c = *s++;
836 0 : if(limit == NULL && c == 0) {
837 0 : s = NULL;
838 0 : return U_SENTINEL;
839 : }
840 : UChar trail;
841 0 : if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
842 0 : ++s;
843 0 : c = U16_GET_SUPPLEMENTARY(c, trail);
844 : }
845 0 : return c;
846 : }
847 :
848 : const UChar *s;
849 : const UChar *limit;
850 : };
851 :
852 0 : class FCDUTF16NFDIterator : public UTF16NFDIterator {
853 : public:
854 0 : FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
855 0 : : UTF16NFDIterator(NULL, NULL) {
856 0 : UErrorCode errorCode = U_ZERO_ERROR;
857 0 : const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
858 0 : if(U_FAILURE(errorCode)) { return; }
859 0 : if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
860 0 : s = text;
861 0 : limit = spanLimit;
862 : } else {
863 0 : str.setTo(text, (int32_t)(spanLimit - text));
864 : {
865 0 : ReorderingBuffer buffer(nfcImpl, str);
866 0 : if(buffer.init(str.length(), errorCode)) {
867 0 : nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode);
868 : }
869 : }
870 0 : if(U_SUCCESS(errorCode)) {
871 0 : s = str.getBuffer();
872 0 : limit = s + str.length();
873 : }
874 : }
875 : }
876 : private:
877 : UnicodeString str;
878 : };
879 :
880 0 : class UTF8NFDIterator : public NFDIterator {
881 : public:
882 0 : UTF8NFDIterator(const uint8_t *text, int32_t textLength)
883 0 : : s(text), pos(0), length(textLength) {}
884 : protected:
885 0 : virtual UChar32 nextRawCodePoint() {
886 0 : if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
887 : UChar32 c;
888 0 : U8_NEXT_OR_FFFD(s, pos, length, c);
889 0 : return c;
890 : }
891 :
892 : const uint8_t *s;
893 : int32_t pos;
894 : int32_t length;
895 : };
896 :
897 0 : class FCDUTF8NFDIterator : public NFDIterator {
898 : public:
899 0 : FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
900 0 : : u8ci(data, FALSE, text, 0, textLength) {}
901 : protected:
902 0 : virtual UChar32 nextRawCodePoint() {
903 0 : UErrorCode errorCode = U_ZERO_ERROR;
904 0 : return u8ci.nextCodePoint(errorCode);
905 : }
906 : private:
907 : FCDUTF8CollationIterator u8ci;
908 : };
909 :
910 0 : class UIterNFDIterator : public NFDIterator {
911 : public:
912 0 : UIterNFDIterator(UCharIterator &it) : iter(it) {}
913 : protected:
914 0 : virtual UChar32 nextRawCodePoint() {
915 0 : return uiter_next32(&iter);
916 : }
917 : private:
918 : UCharIterator &iter;
919 : };
920 :
921 0 : class FCDUIterNFDIterator : public NFDIterator {
922 : public:
923 0 : FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
924 0 : : uici(data, FALSE, it, startIndex) {}
925 : protected:
926 0 : virtual UChar32 nextRawCodePoint() {
927 0 : UErrorCode errorCode = U_ZERO_ERROR;
928 0 : return uici.nextCodePoint(errorCode);
929 : }
930 : private:
931 : FCDUIterCollationIterator uici;
932 : };
933 :
934 0 : UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
935 : NFDIterator &left, NFDIterator &right) {
936 : for(;;) {
937 : // Fetch the next FCD code point from each string.
938 0 : UChar32 leftCp = left.nextCodePoint();
939 0 : UChar32 rightCp = right.nextCodePoint();
940 0 : if(leftCp == rightCp) {
941 0 : if(leftCp < 0) { break; }
942 0 : continue;
943 : }
944 : // If they are different, then decompose each and compare again.
945 0 : if(leftCp < 0) {
946 0 : leftCp = -2; // end of string
947 0 : } else if(leftCp == 0xfffe) {
948 0 : leftCp = -1; // U+FFFE: merge separator
949 : } else {
950 0 : leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
951 : }
952 0 : if(rightCp < 0) {
953 0 : rightCp = -2; // end of string
954 0 : } else if(rightCp == 0xfffe) {
955 0 : rightCp = -1; // U+FFFE: merge separator
956 : } else {
957 0 : rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
958 : }
959 0 : if(leftCp < rightCp) { return UCOL_LESS; }
960 0 : if(leftCp > rightCp) { return UCOL_GREATER; }
961 0 : }
962 0 : return UCOL_EQUAL;
963 : }
964 :
965 : } // namespace
966 :
967 : UCollationResult
968 0 : RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
969 : const UChar *right, int32_t rightLength,
970 : UErrorCode &errorCode) const {
971 : // U_FAILURE(errorCode) checked by caller.
972 0 : if(left == right && leftLength == rightLength) {
973 0 : return UCOL_EQUAL;
974 : }
975 :
976 : // Identical-prefix test.
977 : const UChar *leftLimit;
978 : const UChar *rightLimit;
979 0 : int32_t equalPrefixLength = 0;
980 0 : if(leftLength < 0) {
981 0 : leftLimit = NULL;
982 0 : rightLimit = NULL;
983 : UChar c;
984 0 : while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
985 0 : if(c == 0) { return UCOL_EQUAL; }
986 0 : ++equalPrefixLength;
987 : }
988 : } else {
989 0 : leftLimit = left + leftLength;
990 0 : rightLimit = right + rightLength;
991 : for(;;) {
992 0 : if(equalPrefixLength == leftLength) {
993 0 : if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
994 0 : break;
995 0 : } else if(equalPrefixLength == rightLength ||
996 0 : left[equalPrefixLength] != right[equalPrefixLength]) {
997 : break;
998 : }
999 0 : ++equalPrefixLength;
1000 : }
1001 : }
1002 :
1003 0 : UBool numeric = settings->isNumeric();
1004 0 : if(equalPrefixLength > 0) {
1005 0 : if((equalPrefixLength != leftLength &&
1006 0 : data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1007 0 : (equalPrefixLength != rightLength &&
1008 0 : data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1009 : // Identical prefix: Back up to the start of a contraction or reordering sequence.
1010 0 : while(--equalPrefixLength > 0 &&
1011 0 : data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1012 : }
1013 : // Notes:
1014 : // - A longer string can compare equal to a prefix of it if only ignorables follow.
1015 : // - With a backward level, a longer string can compare less-than a prefix of it.
1016 :
1017 : // Pass the actual start of each string into the CollationIterators,
1018 : // plus the equalPrefixLength position,
1019 : // so that prefix matches back into the equal prefix work.
1020 : }
1021 :
1022 : int32_t result;
1023 0 : int32_t fastLatinOptions = settings->fastLatinOptions;
1024 0 : if(fastLatinOptions >= 0 &&
1025 0 : (equalPrefixLength == leftLength ||
1026 0 : left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1027 0 : (equalPrefixLength == rightLength ||
1028 0 : right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1029 0 : if(leftLength >= 0) {
1030 0 : result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1031 0 : settings->fastLatinPrimaries,
1032 : fastLatinOptions,
1033 0 : left + equalPrefixLength,
1034 : leftLength - equalPrefixLength,
1035 0 : right + equalPrefixLength,
1036 0 : rightLength - equalPrefixLength);
1037 : } else {
1038 0 : result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1039 0 : settings->fastLatinPrimaries,
1040 : fastLatinOptions,
1041 0 : left + equalPrefixLength, -1,
1042 0 : right + equalPrefixLength, -1);
1043 : }
1044 : } else {
1045 0 : result = CollationFastLatin::BAIL_OUT_RESULT;
1046 : }
1047 :
1048 0 : if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1049 0 : if(settings->dontCheckFCD()) {
1050 0 : UTF16CollationIterator leftIter(data, numeric,
1051 0 : left, left + equalPrefixLength, leftLimit);
1052 0 : UTF16CollationIterator rightIter(data, numeric,
1053 0 : right, right + equalPrefixLength, rightLimit);
1054 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1055 : } else {
1056 0 : FCDUTF16CollationIterator leftIter(data, numeric,
1057 0 : left, left + equalPrefixLength, leftLimit);
1058 0 : FCDUTF16CollationIterator rightIter(data, numeric,
1059 0 : right, right + equalPrefixLength, rightLimit);
1060 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1061 : }
1062 : }
1063 0 : if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1064 0 : return (UCollationResult)result;
1065 : }
1066 :
1067 : // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1068 : // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1069 : // and the benefit seems unlikely to be measurable.
1070 :
1071 : // Compare identical level.
1072 0 : const Normalizer2Impl &nfcImpl = data->nfcImpl;
1073 0 : left += equalPrefixLength;
1074 0 : right += equalPrefixLength;
1075 0 : if(settings->dontCheckFCD()) {
1076 0 : UTF16NFDIterator leftIter(left, leftLimit);
1077 0 : UTF16NFDIterator rightIter(right, rightLimit);
1078 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1079 : } else {
1080 0 : FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1081 0 : FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1082 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1083 : }
1084 : }
1085 :
1086 : UCollationResult
1087 0 : RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1088 : const uint8_t *right, int32_t rightLength,
1089 : UErrorCode &errorCode) const {
1090 : // U_FAILURE(errorCode) checked by caller.
1091 0 : if(left == right && leftLength == rightLength) {
1092 0 : return UCOL_EQUAL;
1093 : }
1094 :
1095 : // Identical-prefix test.
1096 0 : int32_t equalPrefixLength = 0;
1097 0 : if(leftLength < 0) {
1098 : uint8_t c;
1099 0 : while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1100 0 : if(c == 0) { return UCOL_EQUAL; }
1101 0 : ++equalPrefixLength;
1102 : }
1103 : } else {
1104 : for(;;) {
1105 0 : if(equalPrefixLength == leftLength) {
1106 0 : if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1107 0 : break;
1108 0 : } else if(equalPrefixLength == rightLength ||
1109 0 : left[equalPrefixLength] != right[equalPrefixLength]) {
1110 : break;
1111 : }
1112 0 : ++equalPrefixLength;
1113 : }
1114 : }
1115 : // Back up to the start of a partially-equal code point.
1116 0 : if(equalPrefixLength > 0 &&
1117 0 : ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1118 0 : (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1119 0 : while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1120 : }
1121 :
1122 0 : UBool numeric = settings->isNumeric();
1123 0 : if(equalPrefixLength > 0) {
1124 0 : UBool unsafe = FALSE;
1125 0 : if(equalPrefixLength != leftLength) {
1126 0 : int32_t i = equalPrefixLength;
1127 : UChar32 c;
1128 0 : U8_NEXT_OR_FFFD(left, i, leftLength, c);
1129 0 : unsafe = data->isUnsafeBackward(c, numeric);
1130 : }
1131 0 : if(!unsafe && equalPrefixLength != rightLength) {
1132 0 : int32_t i = equalPrefixLength;
1133 : UChar32 c;
1134 0 : U8_NEXT_OR_FFFD(right, i, rightLength, c);
1135 0 : unsafe = data->isUnsafeBackward(c, numeric);
1136 : }
1137 0 : if(unsafe) {
1138 : // Identical prefix: Back up to the start of a contraction or reordering sequence.
1139 : UChar32 c;
1140 0 : do {
1141 0 : U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1142 0 : } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1143 : }
1144 : // See the notes in the UTF-16 version.
1145 :
1146 : // Pass the actual start of each string into the CollationIterators,
1147 : // plus the equalPrefixLength position,
1148 : // so that prefix matches back into the equal prefix work.
1149 : }
1150 :
1151 : int32_t result;
1152 0 : int32_t fastLatinOptions = settings->fastLatinOptions;
1153 0 : if(fastLatinOptions >= 0 &&
1154 0 : (equalPrefixLength == leftLength ||
1155 0 : left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1156 0 : (equalPrefixLength == rightLength ||
1157 0 : right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1158 0 : if(leftLength >= 0) {
1159 0 : result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1160 0 : settings->fastLatinPrimaries,
1161 : fastLatinOptions,
1162 : left + equalPrefixLength,
1163 : leftLength - equalPrefixLength,
1164 : right + equalPrefixLength,
1165 0 : rightLength - equalPrefixLength);
1166 : } else {
1167 0 : result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1168 0 : settings->fastLatinPrimaries,
1169 : fastLatinOptions,
1170 : left + equalPrefixLength, -1,
1171 0 : right + equalPrefixLength, -1);
1172 : }
1173 : } else {
1174 0 : result = CollationFastLatin::BAIL_OUT_RESULT;
1175 : }
1176 :
1177 0 : if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1178 0 : if(settings->dontCheckFCD()) {
1179 0 : UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1180 0 : UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1181 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1182 : } else {
1183 0 : FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1184 0 : FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1185 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1186 : }
1187 : }
1188 0 : if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1189 0 : return (UCollationResult)result;
1190 : }
1191 :
1192 : // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1193 : // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1194 : // and the benefit seems unlikely to be measurable.
1195 :
1196 : // Compare identical level.
1197 0 : const Normalizer2Impl &nfcImpl = data->nfcImpl;
1198 0 : left += equalPrefixLength;
1199 0 : right += equalPrefixLength;
1200 0 : if(leftLength > 0) {
1201 0 : leftLength -= equalPrefixLength;
1202 0 : rightLength -= equalPrefixLength;
1203 : }
1204 0 : if(settings->dontCheckFCD()) {
1205 0 : UTF8NFDIterator leftIter(left, leftLength);
1206 0 : UTF8NFDIterator rightIter(right, rightLength);
1207 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1208 : } else {
1209 0 : FCDUTF8NFDIterator leftIter(data, left, leftLength);
1210 0 : FCDUTF8NFDIterator rightIter(data, right, rightLength);
1211 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1212 : }
1213 : }
1214 :
1215 : UCollationResult
1216 0 : RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1217 : UErrorCode &errorCode) const {
1218 0 : if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1219 0 : UBool numeric = settings->isNumeric();
1220 :
1221 : // Identical-prefix test.
1222 0 : int32_t equalPrefixLength = 0;
1223 : {
1224 : UChar32 leftUnit;
1225 : UChar32 rightUnit;
1226 0 : while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1227 0 : if(leftUnit < 0) { return UCOL_EQUAL; }
1228 0 : ++equalPrefixLength;
1229 : }
1230 :
1231 : // Back out the code units that differed, for the real collation comparison.
1232 0 : if(leftUnit >= 0) { left.previous(&left); }
1233 0 : if(rightUnit >= 0) { right.previous(&right); }
1234 :
1235 0 : if(equalPrefixLength > 0) {
1236 0 : if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1237 0 : (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1238 : // Identical prefix: Back up to the start of a contraction or reordering sequence.
1239 0 : do {
1240 0 : --equalPrefixLength;
1241 0 : leftUnit = left.previous(&left);
1242 0 : right.previous(&right);
1243 0 : } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1244 : }
1245 : // See the notes in the UTF-16 version.
1246 : }
1247 : }
1248 :
1249 : UCollationResult result;
1250 0 : if(settings->dontCheckFCD()) {
1251 0 : UIterCollationIterator leftIter(data, numeric, left);
1252 0 : UIterCollationIterator rightIter(data, numeric, right);
1253 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1254 : } else {
1255 0 : FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1256 0 : FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1257 0 : result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1258 : }
1259 0 : if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1260 0 : return result;
1261 : }
1262 :
1263 : // Compare identical level.
1264 0 : left.move(&left, equalPrefixLength, UITER_ZERO);
1265 0 : right.move(&right, equalPrefixLength, UITER_ZERO);
1266 0 : const Normalizer2Impl &nfcImpl = data->nfcImpl;
1267 0 : if(settings->dontCheckFCD()) {
1268 0 : UIterNFDIterator leftIter(left);
1269 0 : UIterNFDIterator rightIter(right);
1270 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1271 : } else {
1272 0 : FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1273 0 : FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1274 0 : return compareNFDIter(nfcImpl, leftIter, rightIter);
1275 : }
1276 : }
1277 :
1278 : CollationKey &
1279 0 : RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1280 : UErrorCode &errorCode) const {
1281 0 : return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1282 : }
1283 :
1284 : CollationKey &
1285 0 : RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1286 : UErrorCode &errorCode) const {
1287 0 : if(U_FAILURE(errorCode)) {
1288 0 : return key.setToBogus();
1289 : }
1290 0 : if(s == NULL && length != 0) {
1291 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1292 0 : return key.setToBogus();
1293 : }
1294 0 : key.reset(); // resets the "bogus" state
1295 0 : CollationKeyByteSink sink(key);
1296 0 : writeSortKey(s, length, sink, errorCode);
1297 0 : if(U_FAILURE(errorCode)) {
1298 0 : key.setToBogus();
1299 0 : } else if(key.isBogus()) {
1300 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
1301 : } else {
1302 0 : key.setLength(sink.NumberOfBytesAppended());
1303 : }
1304 0 : return key;
1305 : }
1306 :
1307 : int32_t
1308 0 : RuleBasedCollator::getSortKey(const UnicodeString &s,
1309 : uint8_t *dest, int32_t capacity) const {
1310 0 : return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1311 : }
1312 :
1313 : int32_t
1314 0 : RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1315 : uint8_t *dest, int32_t capacity) const {
1316 0 : if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1317 0 : return 0;
1318 : }
1319 0 : uint8_t noDest[1] = { 0 };
1320 0 : if(dest == NULL) {
1321 : // Distinguish pure preflighting from an allocation error.
1322 0 : dest = noDest;
1323 0 : capacity = 0;
1324 : }
1325 0 : FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1326 0 : UErrorCode errorCode = U_ZERO_ERROR;
1327 0 : writeSortKey(s, length, sink, errorCode);
1328 0 : return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1329 : }
1330 :
1331 : void
1332 0 : RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1333 : SortKeyByteSink &sink, UErrorCode &errorCode) const {
1334 0 : if(U_FAILURE(errorCode)) { return; }
1335 0 : const UChar *limit = (length >= 0) ? s + length : NULL;
1336 0 : UBool numeric = settings->isNumeric();
1337 0 : CollationKeys::LevelCallback callback;
1338 0 : if(settings->dontCheckFCD()) {
1339 0 : UTF16CollationIterator iter(data, numeric, s, s, limit);
1340 0 : CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1341 : sink, Collation::PRIMARY_LEVEL,
1342 0 : callback, TRUE, errorCode);
1343 : } else {
1344 0 : FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1345 0 : CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1346 : sink, Collation::PRIMARY_LEVEL,
1347 0 : callback, TRUE, errorCode);
1348 : }
1349 0 : if(settings->getStrength() == UCOL_IDENTICAL) {
1350 0 : writeIdenticalLevel(s, limit, sink, errorCode);
1351 : }
1352 : static const char terminator = 0; // TERMINATOR_BYTE
1353 0 : sink.Append(&terminator, 1);
1354 : }
1355 :
1356 : void
1357 0 : RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1358 : SortKeyByteSink &sink, UErrorCode &errorCode) const {
1359 : // NFD quick check
1360 0 : const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1361 0 : if(U_FAILURE(errorCode)) { return; }
1362 0 : sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1363 0 : UChar32 prev = 0;
1364 0 : if(nfdQCYesLimit != s) {
1365 0 : prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1366 : }
1367 : // Is there non-NFD text?
1368 : int32_t destLengthEstimate;
1369 0 : if(limit != NULL) {
1370 0 : if(nfdQCYesLimit == limit) { return; }
1371 0 : destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1372 : } else {
1373 : // s is NUL-terminated
1374 0 : if(*nfdQCYesLimit == 0) { return; }
1375 0 : destLengthEstimate = -1;
1376 : }
1377 0 : UnicodeString nfd;
1378 0 : data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1379 0 : u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1380 : }
1381 :
1382 : namespace {
1383 :
1384 : /**
1385 : * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1386 : * with an instance of this callback class.
1387 : * When another level is about to be written, the callback
1388 : * records the level and the number of bytes that will be written until
1389 : * the sink (which is actually a FixedSortKeyByteSink) fills up.
1390 : *
1391 : * When internalNextSortKeyPart() is called again, it restarts with the last level
1392 : * and ignores as many bytes as were written previously for that level.
1393 : */
1394 : class PartLevelCallback : public CollationKeys::LevelCallback {
1395 : public:
1396 0 : PartLevelCallback(const SortKeyByteSink &s)
1397 0 : : sink(s), level(Collation::PRIMARY_LEVEL) {
1398 0 : levelCapacity = sink.GetRemainingCapacity();
1399 0 : }
1400 0 : virtual ~PartLevelCallback() {}
1401 0 : virtual UBool needToWrite(Collation::Level l) {
1402 0 : if(!sink.Overflowed()) {
1403 : // Remember a level that will be at least partially written.
1404 0 : level = l;
1405 0 : levelCapacity = sink.GetRemainingCapacity();
1406 0 : return TRUE;
1407 : } else {
1408 0 : return FALSE;
1409 : }
1410 : }
1411 0 : Collation::Level getLevel() const { return level; }
1412 0 : int32_t getLevelCapacity() const { return levelCapacity; }
1413 :
1414 : private:
1415 : const SortKeyByteSink &sink;
1416 : Collation::Level level;
1417 : int32_t levelCapacity;
1418 : };
1419 :
1420 : } // namespace
1421 :
1422 : int32_t
1423 0 : RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1424 : uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1425 0 : if(U_FAILURE(errorCode)) { return 0; }
1426 0 : if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1427 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1428 0 : return 0;
1429 : }
1430 0 : if(count == 0) { return 0; }
1431 :
1432 0 : FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1433 0 : sink.IgnoreBytes((int32_t)state[1]);
1434 0 : iter->move(iter, 0, UITER_START);
1435 :
1436 0 : Collation::Level level = (Collation::Level)state[0];
1437 0 : if(level <= Collation::QUATERNARY_LEVEL) {
1438 0 : UBool numeric = settings->isNumeric();
1439 0 : PartLevelCallback callback(sink);
1440 0 : if(settings->dontCheckFCD()) {
1441 0 : UIterCollationIterator ci(data, numeric, *iter);
1442 0 : CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1443 0 : sink, level, callback, FALSE, errorCode);
1444 : } else {
1445 0 : FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1446 0 : CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1447 0 : sink, level, callback, FALSE, errorCode);
1448 : }
1449 0 : if(U_FAILURE(errorCode)) { return 0; }
1450 0 : if(sink.NumberOfBytesAppended() > count) {
1451 0 : state[0] = (uint32_t)callback.getLevel();
1452 0 : state[1] = (uint32_t)callback.getLevelCapacity();
1453 0 : return count;
1454 : }
1455 : // All of the normal levels are done.
1456 0 : if(settings->getStrength() == UCOL_IDENTICAL) {
1457 0 : level = Collation::IDENTICAL_LEVEL;
1458 0 : iter->move(iter, 0, UITER_START);
1459 : }
1460 : // else fall through to setting ZERO_LEVEL
1461 : }
1462 :
1463 0 : if(level == Collation::IDENTICAL_LEVEL) {
1464 0 : int32_t levelCapacity = sink.GetRemainingCapacity();
1465 0 : UnicodeString s;
1466 : for(;;) {
1467 0 : UChar32 c = iter->next(iter);
1468 0 : if(c < 0) { break; }
1469 0 : s.append((UChar)c);
1470 0 : }
1471 0 : const UChar *sArray = s.getBuffer();
1472 0 : writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1473 0 : if(U_FAILURE(errorCode)) { return 0; }
1474 0 : if(sink.NumberOfBytesAppended() > count) {
1475 0 : state[0] = (uint32_t)level;
1476 0 : state[1] = (uint32_t)levelCapacity;
1477 0 : return count;
1478 : }
1479 : }
1480 :
1481 : // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1482 0 : state[0] = (uint32_t)Collation::ZERO_LEVEL;
1483 0 : state[1] = 0;
1484 0 : int32_t length = sink.NumberOfBytesAppended();
1485 0 : int32_t i = length;
1486 0 : while(i < count) { dest[i++] = 0; }
1487 0 : return length;
1488 : }
1489 :
1490 : void
1491 0 : RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1492 : UErrorCode &errorCode) const {
1493 0 : if(U_FAILURE(errorCode)) { return; }
1494 0 : const UChar *s = str.getBuffer();
1495 0 : const UChar *limit = s + str.length();
1496 0 : UBool numeric = settings->isNumeric();
1497 0 : if(settings->dontCheckFCD()) {
1498 0 : UTF16CollationIterator iter(data, numeric, s, s, limit);
1499 : int64_t ce;
1500 0 : while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1501 0 : ces.addElement(ce, errorCode);
1502 : }
1503 : } else {
1504 0 : FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1505 : int64_t ce;
1506 0 : while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1507 0 : ces.addElement(ce, errorCode);
1508 : }
1509 : }
1510 : }
1511 :
1512 : namespace {
1513 :
1514 0 : void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1515 : UErrorCode &errorCode) {
1516 0 : if(U_FAILURE(errorCode) || length == 0) { return; }
1517 0 : if(!s.isEmpty()) {
1518 0 : s.append('_', errorCode);
1519 : }
1520 0 : s.append(letter, errorCode);
1521 0 : for(int32_t i = 0; i < length; ++i) {
1522 0 : s.append(uprv_toupper(subtag[i]), errorCode);
1523 : }
1524 : }
1525 :
1526 0 : void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1527 : UErrorCode &errorCode) {
1528 0 : if(U_FAILURE(errorCode)) { return; }
1529 0 : if(!s.isEmpty()) {
1530 0 : s.append('_', errorCode);
1531 : }
1532 : static const char *valueChars = "1234...........IXO..SN..LU......";
1533 0 : s.append(letter, errorCode);
1534 0 : s.append(valueChars[value], errorCode);
1535 : }
1536 :
1537 : } // namespace
1538 :
1539 : int32_t
1540 0 : RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1541 : char *buffer, int32_t capacity,
1542 : UErrorCode &errorCode) const {
1543 0 : if(U_FAILURE(errorCode)) { return 0; }
1544 0 : if(buffer == NULL ? capacity != 0 : capacity < 0) {
1545 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1546 0 : return 0;
1547 : }
1548 0 : if(locale == NULL) {
1549 0 : locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1550 : }
1551 :
1552 : char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1553 : int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1554 : "collation", locale,
1555 0 : NULL, &errorCode);
1556 0 : if(U_FAILURE(errorCode)) { return 0; }
1557 0 : if(length == 0) {
1558 0 : uprv_strcpy(resultLocale, "root");
1559 : } else {
1560 0 : resultLocale[length] = 0;
1561 : }
1562 :
1563 : // Append items in alphabetic order of their short definition letters.
1564 0 : CharString result;
1565 : char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1566 :
1567 0 : if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1568 0 : appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1569 : }
1570 : // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1571 : // See ICU tickets #10372 and #10386.
1572 0 : if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1573 0 : appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1574 : }
1575 0 : if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1576 0 : appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1577 : }
1578 0 : if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1579 0 : appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1580 : }
1581 0 : if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1582 0 : appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1583 : }
1584 : // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1585 0 : length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
1586 0 : appendSubtag(result, 'K', subtag, length, errorCode);
1587 0 : length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1588 0 : appendSubtag(result, 'L', subtag, length, errorCode);
1589 0 : if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1590 0 : appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1591 : }
1592 0 : length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1593 0 : appendSubtag(result, 'R', subtag, length, errorCode);
1594 0 : if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1595 0 : appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1596 : }
1597 0 : length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1598 0 : appendSubtag(result, 'V', subtag, length, errorCode);
1599 0 : length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1600 0 : appendSubtag(result, 'Z', subtag, length, errorCode);
1601 :
1602 0 : if(U_FAILURE(errorCode)) { return 0; }
1603 0 : if(result.length() <= capacity) {
1604 0 : uprv_memcpy(buffer, result.data(), result.length());
1605 : }
1606 0 : return u_terminateChars(buffer, capacity, result.length(), &errorCode);
1607 : }
1608 :
1609 : UBool
1610 0 : RuleBasedCollator::isUnsafe(UChar32 c) const {
1611 0 : return data->isUnsafeBackward(c, settings->isNumeric());
1612 : }
1613 :
1614 : void U_CALLCONV
1615 0 : RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1616 0 : t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1617 0 : }
1618 :
1619 : UBool
1620 0 : RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1621 0 : umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1622 0 : return U_SUCCESS(errorCode);
1623 : }
1624 :
1625 : CollationElementIterator *
1626 0 : RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1627 0 : UErrorCode errorCode = U_ZERO_ERROR;
1628 0 : if(!initMaxExpansions(errorCode)) { return NULL; }
1629 0 : CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1630 0 : if(U_FAILURE(errorCode)) {
1631 0 : delete cei;
1632 0 : return NULL;
1633 : }
1634 0 : return cei;
1635 : }
1636 :
1637 : CollationElementIterator *
1638 0 : RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1639 0 : UErrorCode errorCode = U_ZERO_ERROR;
1640 0 : if(!initMaxExpansions(errorCode)) { return NULL; }
1641 0 : CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1642 0 : if(U_FAILURE(errorCode)) {
1643 0 : delete cei;
1644 0 : return NULL;
1645 : }
1646 0 : return cei;
1647 : }
1648 :
1649 : int32_t
1650 0 : RuleBasedCollator::getMaxExpansion(int32_t order) const {
1651 0 : UErrorCode errorCode = U_ZERO_ERROR;
1652 0 : (void)initMaxExpansions(errorCode);
1653 0 : return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1654 : }
1655 :
1656 : U_NAMESPACE_END
1657 :
1658 : #endif // !UCONFIG_NO_COLLATION
|