Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2013-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collationfastlatin.cpp
9 : *
10 : * created on: 2013aug18
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #include "unicode/utypes.h"
15 :
16 : #if !UCONFIG_NO_COLLATION
17 :
18 : #include "unicode/ucol.h"
19 : #include "collationdata.h"
20 : #include "collationfastlatin.h"
21 : #include "collationsettings.h"
22 : #include "uassert.h"
23 :
24 : U_NAMESPACE_BEGIN
25 :
26 : int32_t
27 0 : CollationFastLatin::getOptions(const CollationData *data, const CollationSettings &settings,
28 : uint16_t *primaries, int32_t capacity) {
29 0 : const uint16_t *table = data->fastLatinTable;
30 0 : if(table == NULL) { return -1; }
31 0 : U_ASSERT(capacity == LATIN_LIMIT);
32 0 : if(capacity != LATIN_LIMIT) { return -1; }
33 :
34 : uint32_t miniVarTop;
35 0 : if((settings.options & CollationSettings::ALTERNATE_MASK) == 0) {
36 : // No mini primaries are variable, set a variableTop just below the
37 : // lowest long mini primary.
38 0 : miniVarTop = MIN_LONG - 1;
39 : } else {
40 0 : int32_t headerLength = *table & 0xff;
41 0 : int32_t i = 1 + settings.getMaxVariable();
42 0 : if(i >= headerLength) {
43 0 : return -1; // variableTop >= digits, should not occur
44 : }
45 0 : miniVarTop = table[i];
46 : }
47 :
48 0 : UBool digitsAreReordered = FALSE;
49 0 : if(settings.hasReordering()) {
50 0 : uint32_t prevStart = 0;
51 0 : uint32_t beforeDigitStart = 0;
52 0 : uint32_t digitStart = 0;
53 0 : uint32_t afterDigitStart = 0;
54 0 : for(int32_t group = UCOL_REORDER_CODE_FIRST;
55 0 : group < UCOL_REORDER_CODE_FIRST + CollationData::MAX_NUM_SPECIAL_REORDER_CODES;
56 : ++group) {
57 0 : uint32_t start = data->getFirstPrimaryForGroup(group);
58 0 : start = settings.reorder(start);
59 0 : if(group == UCOL_REORDER_CODE_DIGIT) {
60 0 : beforeDigitStart = prevStart;
61 0 : digitStart = start;
62 0 : } else if(start != 0) {
63 0 : if(start < prevStart) {
64 : // The permutation affects the groups up to Latin.
65 0 : return -1;
66 : }
67 : // In the future, there might be a special group between digits & Latin.
68 0 : if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) {
69 0 : afterDigitStart = start;
70 : }
71 0 : prevStart = start;
72 : }
73 : }
74 0 : uint32_t latinStart = data->getFirstPrimaryForGroup(USCRIPT_LATIN);
75 0 : latinStart = settings.reorder(latinStart);
76 0 : if(latinStart < prevStart) {
77 0 : return -1;
78 : }
79 0 : if(afterDigitStart == 0) {
80 0 : afterDigitStart = latinStart;
81 : }
82 0 : if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
83 0 : digitsAreReordered = TRUE;
84 : }
85 : }
86 :
87 0 : table += (table[0] & 0xff); // skip the header
88 0 : for(UChar32 c = 0; c < LATIN_LIMIT; ++c) {
89 0 : uint32_t p = table[c];
90 0 : if(p >= MIN_SHORT) {
91 0 : p &= SHORT_PRIMARY_MASK;
92 0 : } else if(p > miniVarTop) {
93 0 : p &= LONG_PRIMARY_MASK;
94 : } else {
95 0 : p = 0;
96 : }
97 0 : primaries[c] = (uint16_t)p;
98 : }
99 0 : if(digitsAreReordered || (settings.options & CollationSettings::NUMERIC) != 0) {
100 : // Bail out for digits.
101 0 : for(UChar32 c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
102 : }
103 :
104 : // Shift the miniVarTop above other options.
105 0 : return ((int32_t)miniVarTop << 16) | settings.options;
106 : }
107 :
108 : int32_t
109 0 : CollationFastLatin::compareUTF16(const uint16_t *table, const uint16_t *primaries, int32_t options,
110 : const UChar *left, int32_t leftLength,
111 : const UChar *right, int32_t rightLength) {
112 : // This is a modified copy of CollationCompare::compareUpToQuaternary(),
113 : // optimized for common Latin text.
114 : // Keep them in sync!
115 : // Keep compareUTF16() and compareUTF8() in sync very closely!
116 :
117 0 : U_ASSERT((table[0] >> 8) == VERSION);
118 0 : table += (table[0] & 0xff); // skip the header
119 0 : uint32_t variableTop = (uint32_t)options >> 16; // see getOptions()
120 0 : options &= 0xffff; // needed for CollationSettings::getStrength() to work
121 :
122 : // Check for supported characters, fetch mini CEs, and compare primaries.
123 0 : int32_t leftIndex = 0, rightIndex = 0;
124 : /**
125 : * Single mini CE or a pair.
126 : * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
127 : * If there is only one, then it is in the lower bits, and the upper bits are 0.
128 : */
129 0 : uint32_t leftPair = 0, rightPair = 0;
130 : for(;;) {
131 : // We fetch CEs until we get a non-ignorable primary or reach the end.
132 0 : while(leftPair == 0) {
133 0 : if(leftIndex == leftLength) {
134 0 : leftPair = EOS;
135 0 : break;
136 : }
137 0 : UChar32 c = left[leftIndex++];
138 0 : if(c <= LATIN_MAX) {
139 0 : leftPair = primaries[c];
140 0 : if(leftPair != 0) { break; }
141 0 : if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
142 0 : return BAIL_OUT_RESULT;
143 : }
144 0 : leftPair = table[c];
145 0 : } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
146 0 : leftPair = table[c - PUNCT_START + LATIN_LIMIT];
147 : } else {
148 0 : leftPair = lookup(table, c);
149 : }
150 0 : if(leftPair >= MIN_SHORT) {
151 0 : leftPair &= SHORT_PRIMARY_MASK;
152 0 : break;
153 0 : } else if(leftPair > variableTop) {
154 0 : leftPair &= LONG_PRIMARY_MASK;
155 0 : break;
156 : } else {
157 0 : leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
158 0 : if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
159 0 : leftPair = getPrimaries(variableTop, leftPair);
160 : }
161 : }
162 :
163 0 : while(rightPair == 0) {
164 0 : if(rightIndex == rightLength) {
165 0 : rightPair = EOS;
166 0 : break;
167 : }
168 0 : UChar32 c = right[rightIndex++];
169 0 : if(c <= LATIN_MAX) {
170 0 : rightPair = primaries[c];
171 0 : if(rightPair != 0) { break; }
172 0 : if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
173 0 : return BAIL_OUT_RESULT;
174 : }
175 0 : rightPair = table[c];
176 0 : } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
177 0 : rightPair = table[c - PUNCT_START + LATIN_LIMIT];
178 : } else {
179 0 : rightPair = lookup(table, c);
180 : }
181 0 : if(rightPair >= MIN_SHORT) {
182 0 : rightPair &= SHORT_PRIMARY_MASK;
183 0 : break;
184 0 : } else if(rightPair > variableTop) {
185 0 : rightPair &= LONG_PRIMARY_MASK;
186 0 : break;
187 : } else {
188 0 : rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
189 0 : if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
190 0 : rightPair = getPrimaries(variableTop, rightPair);
191 : }
192 : }
193 :
194 0 : if(leftPair == rightPair) {
195 0 : if(leftPair == EOS) { break; }
196 0 : leftPair = rightPair = 0;
197 0 : continue;
198 : }
199 0 : uint32_t leftPrimary = leftPair & 0xffff;
200 0 : uint32_t rightPrimary = rightPair & 0xffff;
201 0 : if(leftPrimary != rightPrimary) {
202 : // Return the primary difference.
203 0 : return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
204 : }
205 0 : if(leftPair == EOS) { break; }
206 0 : leftPair >>= 16;
207 0 : rightPair >>= 16;
208 0 : }
209 : // In the following, we need to re-fetch each character because we did not buffer the CEs,
210 : // but we know that the string is well-formed and
211 : // only contains supported characters and mappings.
212 :
213 : // We might skip the secondary level but continue with the case level
214 : // which is turned on separately.
215 0 : if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
216 0 : leftIndex = rightIndex = 0;
217 0 : leftPair = rightPair = 0;
218 : for(;;) {
219 0 : while(leftPair == 0) {
220 0 : if(leftIndex == leftLength) {
221 0 : leftPair = EOS;
222 0 : break;
223 : }
224 0 : UChar32 c = left[leftIndex++];
225 0 : if(c <= LATIN_MAX) {
226 0 : leftPair = table[c];
227 0 : } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
228 0 : leftPair = table[c - PUNCT_START + LATIN_LIMIT];
229 : } else {
230 0 : leftPair = lookup(table, c);
231 : }
232 0 : if(leftPair >= MIN_SHORT) {
233 0 : leftPair = getSecondariesFromOneShortCE(leftPair);
234 0 : break;
235 0 : } else if(leftPair > variableTop) {
236 0 : leftPair = COMMON_SEC_PLUS_OFFSET;
237 0 : break;
238 : } else {
239 0 : leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
240 0 : leftPair = getSecondaries(variableTop, leftPair);
241 : }
242 : }
243 :
244 0 : while(rightPair == 0) {
245 0 : if(rightIndex == rightLength) {
246 0 : rightPair = EOS;
247 0 : break;
248 : }
249 0 : UChar32 c = right[rightIndex++];
250 0 : if(c <= LATIN_MAX) {
251 0 : rightPair = table[c];
252 0 : } else if(PUNCT_START <= c && c < PUNCT_LIMIT) {
253 0 : rightPair = table[c - PUNCT_START + LATIN_LIMIT];
254 : } else {
255 0 : rightPair = lookup(table, c);
256 : }
257 0 : if(rightPair >= MIN_SHORT) {
258 0 : rightPair = getSecondariesFromOneShortCE(rightPair);
259 0 : break;
260 0 : } else if(rightPair > variableTop) {
261 0 : rightPair = COMMON_SEC_PLUS_OFFSET;
262 0 : break;
263 : } else {
264 0 : rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
265 0 : rightPair = getSecondaries(variableTop, rightPair);
266 : }
267 : }
268 :
269 0 : if(leftPair == rightPair) {
270 0 : if(leftPair == EOS) { break; }
271 0 : leftPair = rightPair = 0;
272 0 : continue;
273 : }
274 0 : uint32_t leftSecondary = leftPair & 0xffff;
275 0 : uint32_t rightSecondary = rightPair & 0xffff;
276 0 : if(leftSecondary != rightSecondary) {
277 0 : if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
278 : // Full support for backwards secondary requires backwards contraction matching
279 : // and moving backwards between merge separators.
280 0 : return BAIL_OUT_RESULT;
281 : }
282 0 : return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
283 : }
284 0 : if(leftPair == EOS) { break; }
285 0 : leftPair >>= 16;
286 0 : rightPair >>= 16;
287 0 : }
288 : }
289 :
290 0 : if((options & CollationSettings::CASE_LEVEL) != 0) {
291 0 : UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY;
292 0 : leftIndex = rightIndex = 0;
293 0 : leftPair = rightPair = 0;
294 : for(;;) {
295 0 : while(leftPair == 0) {
296 0 : if(leftIndex == leftLength) {
297 0 : leftPair = EOS;
298 0 : break;
299 : }
300 0 : UChar32 c = left[leftIndex++];
301 0 : leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
302 0 : if(leftPair < MIN_LONG) {
303 0 : leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
304 : }
305 0 : leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
306 : }
307 :
308 0 : while(rightPair == 0) {
309 0 : if(rightIndex == rightLength) {
310 0 : rightPair = EOS;
311 0 : break;
312 : }
313 0 : UChar32 c = right[rightIndex++];
314 0 : rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
315 0 : if(rightPair < MIN_LONG) {
316 0 : rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
317 : }
318 0 : rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
319 : }
320 :
321 0 : if(leftPair == rightPair) {
322 0 : if(leftPair == EOS) { break; }
323 0 : leftPair = rightPair = 0;
324 0 : continue;
325 : }
326 0 : uint32_t leftCase = leftPair & 0xffff;
327 0 : uint32_t rightCase = rightPair & 0xffff;
328 0 : if(leftCase != rightCase) {
329 0 : if((options & CollationSettings::UPPER_FIRST) == 0) {
330 0 : return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
331 : } else {
332 0 : return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
333 : }
334 : }
335 0 : if(leftPair == EOS) { break; }
336 0 : leftPair >>= 16;
337 0 : rightPair >>= 16;
338 0 : }
339 : }
340 0 : if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
341 :
342 : // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
343 0 : UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options);
344 :
345 0 : leftIndex = rightIndex = 0;
346 0 : leftPair = rightPair = 0;
347 : for(;;) {
348 0 : while(leftPair == 0) {
349 0 : if(leftIndex == leftLength) {
350 0 : leftPair = EOS;
351 0 : break;
352 : }
353 0 : UChar32 c = left[leftIndex++];
354 0 : leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
355 0 : if(leftPair < MIN_LONG) {
356 0 : leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
357 : }
358 0 : leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
359 : }
360 :
361 0 : while(rightPair == 0) {
362 0 : if(rightIndex == rightLength) {
363 0 : rightPair = EOS;
364 0 : break;
365 : }
366 0 : UChar32 c = right[rightIndex++];
367 0 : rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
368 0 : if(rightPair < MIN_LONG) {
369 0 : rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
370 : }
371 0 : rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
372 : }
373 :
374 0 : if(leftPair == rightPair) {
375 0 : if(leftPair == EOS) { break; }
376 0 : leftPair = rightPair = 0;
377 0 : continue;
378 : }
379 0 : uint32_t leftTertiary = leftPair & 0xffff;
380 0 : uint32_t rightTertiary = rightPair & 0xffff;
381 0 : if(leftTertiary != rightTertiary) {
382 0 : if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
383 : // Pass through EOS and MERGE_WEIGHT
384 : // and keep real tertiary weights larger than the MERGE_WEIGHT.
385 : // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
386 0 : if(leftTertiary > MERGE_WEIGHT) {
387 0 : leftTertiary ^= CASE_MASK;
388 : }
389 0 : if(rightTertiary > MERGE_WEIGHT) {
390 0 : rightTertiary ^= CASE_MASK;
391 : }
392 : }
393 0 : return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
394 : }
395 0 : if(leftPair == EOS) { break; }
396 0 : leftPair >>= 16;
397 0 : rightPair >>= 16;
398 0 : }
399 0 : if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
400 :
401 0 : leftIndex = rightIndex = 0;
402 0 : leftPair = rightPair = 0;
403 : for(;;) {
404 0 : while(leftPair == 0) {
405 0 : if(leftIndex == leftLength) {
406 0 : leftPair = EOS;
407 0 : break;
408 : }
409 0 : UChar32 c = left[leftIndex++];
410 0 : leftPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
411 0 : if(leftPair < MIN_LONG) {
412 0 : leftPair = nextPair(table, c, leftPair, left, NULL, leftIndex, leftLength);
413 : }
414 0 : leftPair = getQuaternaries(variableTop, leftPair);
415 : }
416 :
417 0 : while(rightPair == 0) {
418 0 : if(rightIndex == rightLength) {
419 0 : rightPair = EOS;
420 0 : break;
421 : }
422 0 : UChar32 c = right[rightIndex++];
423 0 : rightPair = (c <= LATIN_MAX) ? table[c] : lookup(table, c);
424 0 : if(rightPair < MIN_LONG) {
425 0 : rightPair = nextPair(table, c, rightPair, right, NULL, rightIndex, rightLength);
426 : }
427 0 : rightPair = getQuaternaries(variableTop, rightPair);
428 : }
429 :
430 0 : if(leftPair == rightPair) {
431 0 : if(leftPair == EOS) { break; }
432 0 : leftPair = rightPair = 0;
433 0 : continue;
434 : }
435 0 : uint32_t leftQuaternary = leftPair & 0xffff;
436 0 : uint32_t rightQuaternary = rightPair & 0xffff;
437 0 : if(leftQuaternary != rightQuaternary) {
438 0 : return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
439 : }
440 0 : if(leftPair == EOS) { break; }
441 0 : leftPair >>= 16;
442 0 : rightPair >>= 16;
443 0 : }
444 0 : return UCOL_EQUAL;
445 : }
446 :
447 : int32_t
448 0 : CollationFastLatin::compareUTF8(const uint16_t *table, const uint16_t *primaries, int32_t options,
449 : const uint8_t *left, int32_t leftLength,
450 : const uint8_t *right, int32_t rightLength) {
451 : // Keep compareUTF16() and compareUTF8() in sync very closely!
452 :
453 0 : U_ASSERT((table[0] >> 8) == VERSION);
454 0 : table += (table[0] & 0xff); // skip the header
455 0 : uint32_t variableTop = (uint32_t)options >> 16; // see RuleBasedCollator::getFastLatinOptions()
456 0 : options &= 0xffff; // needed for CollationSettings::getStrength() to work
457 :
458 : // Check for supported characters, fetch mini CEs, and compare primaries.
459 0 : int32_t leftIndex = 0, rightIndex = 0;
460 : /**
461 : * Single mini CE or a pair.
462 : * The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits.
463 : * If there is only one, then it is in the lower bits, and the upper bits are 0.
464 : */
465 0 : uint32_t leftPair = 0, rightPair = 0;
466 : // Note: There is no need to assemble the code point.
467 : // We only need to look up the table entry for the character,
468 : // and nextPair() looks for whether c==0.
469 : for(;;) {
470 : // We fetch CEs until we get a non-ignorable primary or reach the end.
471 0 : while(leftPair == 0) {
472 0 : if(leftIndex == leftLength) {
473 0 : leftPair = EOS;
474 0 : break;
475 : }
476 0 : UChar32 c = left[leftIndex++];
477 : uint8_t t;
478 0 : if(c <= 0x7f) {
479 0 : leftPair = primaries[c];
480 0 : if(leftPair != 0) { break; }
481 0 : if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
482 0 : return BAIL_OUT_RESULT;
483 : }
484 0 : leftPair = table[c];
485 0 : } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && leftIndex != leftLength &&
486 0 : 0x80 <= (t = left[leftIndex]) && t <= 0xbf) {
487 0 : ++leftIndex;
488 0 : c = ((c - 0xc2) << 6) + t;
489 0 : leftPair = primaries[c];
490 0 : if(leftPair != 0) { break; }
491 0 : leftPair = table[c];
492 : } else {
493 0 : leftPair = lookupUTF8(table, c, left, leftIndex, leftLength);
494 : }
495 0 : if(leftPair >= MIN_SHORT) {
496 0 : leftPair &= SHORT_PRIMARY_MASK;
497 0 : break;
498 0 : } else if(leftPair > variableTop) {
499 0 : leftPair &= LONG_PRIMARY_MASK;
500 0 : break;
501 : } else {
502 0 : leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
503 0 : if(leftPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
504 0 : leftPair = getPrimaries(variableTop, leftPair);
505 : }
506 : }
507 :
508 0 : while(rightPair == 0) {
509 0 : if(rightIndex == rightLength) {
510 0 : rightPair = EOS;
511 0 : break;
512 : }
513 0 : UChar32 c = right[rightIndex++];
514 : uint8_t t;
515 0 : if(c <= 0x7f) {
516 0 : rightPair = primaries[c];
517 0 : if(rightPair != 0) { break; }
518 0 : if(c <= 0x39 && c >= 0x30 && (options & CollationSettings::NUMERIC) != 0) {
519 0 : return BAIL_OUT_RESULT;
520 : }
521 0 : rightPair = table[c];
522 0 : } else if(c <= LATIN_MAX_UTF8_LEAD && 0xc2 <= c && rightIndex != rightLength &&
523 0 : 0x80 <= (t = right[rightIndex]) && t <= 0xbf) {
524 0 : ++rightIndex;
525 0 : c = ((c - 0xc2) << 6) + t;
526 0 : rightPair = primaries[c];
527 0 : if(rightPair != 0) { break; }
528 0 : rightPair = table[c];
529 : } else {
530 0 : rightPair = lookupUTF8(table, c, right, rightIndex, rightLength);
531 : }
532 0 : if(rightPair >= MIN_SHORT) {
533 0 : rightPair &= SHORT_PRIMARY_MASK;
534 0 : break;
535 0 : } else if(rightPair > variableTop) {
536 0 : rightPair &= LONG_PRIMARY_MASK;
537 0 : break;
538 : } else {
539 0 : rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
540 0 : if(rightPair == BAIL_OUT) { return BAIL_OUT_RESULT; }
541 0 : rightPair = getPrimaries(variableTop, rightPair);
542 : }
543 : }
544 :
545 0 : if(leftPair == rightPair) {
546 0 : if(leftPair == EOS) { break; }
547 0 : leftPair = rightPair = 0;
548 0 : continue;
549 : }
550 0 : uint32_t leftPrimary = leftPair & 0xffff;
551 0 : uint32_t rightPrimary = rightPair & 0xffff;
552 0 : if(leftPrimary != rightPrimary) {
553 : // Return the primary difference.
554 0 : return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
555 : }
556 0 : if(leftPair == EOS) { break; }
557 0 : leftPair >>= 16;
558 0 : rightPair >>= 16;
559 0 : }
560 : // In the following, we need to re-fetch each character because we did not buffer the CEs,
561 : // but we know that the string is well-formed and
562 : // only contains supported characters and mappings.
563 :
564 : // We might skip the secondary level but continue with the case level
565 : // which is turned on separately.
566 0 : if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
567 0 : leftIndex = rightIndex = 0;
568 0 : leftPair = rightPair = 0;
569 : for(;;) {
570 0 : while(leftPair == 0) {
571 0 : if(leftIndex == leftLength) {
572 0 : leftPair = EOS;
573 0 : break;
574 : }
575 0 : UChar32 c = left[leftIndex++];
576 0 : if(c <= 0x7f) {
577 0 : leftPair = table[c];
578 0 : } else if(c <= LATIN_MAX_UTF8_LEAD) {
579 0 : leftPair = table[((c - 0xc2) << 6) + left[leftIndex++]];
580 : } else {
581 0 : leftPair = lookupUTF8Unsafe(table, c, left, leftIndex);
582 : }
583 0 : if(leftPair >= MIN_SHORT) {
584 0 : leftPair = getSecondariesFromOneShortCE(leftPair);
585 0 : break;
586 0 : } else if(leftPair > variableTop) {
587 0 : leftPair = COMMON_SEC_PLUS_OFFSET;
588 0 : break;
589 : } else {
590 0 : leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
591 0 : leftPair = getSecondaries(variableTop, leftPair);
592 : }
593 : }
594 :
595 0 : while(rightPair == 0) {
596 0 : if(rightIndex == rightLength) {
597 0 : rightPair = EOS;
598 0 : break;
599 : }
600 0 : UChar32 c = right[rightIndex++];
601 0 : if(c <= 0x7f) {
602 0 : rightPair = table[c];
603 0 : } else if(c <= LATIN_MAX_UTF8_LEAD) {
604 0 : rightPair = table[((c - 0xc2) << 6) + right[rightIndex++]];
605 : } else {
606 0 : rightPair = lookupUTF8Unsafe(table, c, right, rightIndex);
607 : }
608 0 : if(rightPair >= MIN_SHORT) {
609 0 : rightPair = getSecondariesFromOneShortCE(rightPair);
610 0 : break;
611 0 : } else if(rightPair > variableTop) {
612 0 : rightPair = COMMON_SEC_PLUS_OFFSET;
613 0 : break;
614 : } else {
615 0 : rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
616 0 : rightPair = getSecondaries(variableTop, rightPair);
617 : }
618 : }
619 :
620 0 : if(leftPair == rightPair) {
621 0 : if(leftPair == EOS) { break; }
622 0 : leftPair = rightPair = 0;
623 0 : continue;
624 : }
625 0 : uint32_t leftSecondary = leftPair & 0xffff;
626 0 : uint32_t rightSecondary = rightPair & 0xffff;
627 0 : if(leftSecondary != rightSecondary) {
628 0 : if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
629 : // Full support for backwards secondary requires backwards contraction matching
630 : // and moving backwards between merge separators.
631 0 : return BAIL_OUT_RESULT;
632 : }
633 0 : return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
634 : }
635 0 : if(leftPair == EOS) { break; }
636 0 : leftPair >>= 16;
637 0 : rightPair >>= 16;
638 0 : }
639 : }
640 :
641 0 : if((options & CollationSettings::CASE_LEVEL) != 0) {
642 0 : UBool strengthIsPrimary = CollationSettings::getStrength(options) == UCOL_PRIMARY;
643 0 : leftIndex = rightIndex = 0;
644 0 : leftPair = rightPair = 0;
645 : for(;;) {
646 0 : while(leftPair == 0) {
647 0 : if(leftIndex == leftLength) {
648 0 : leftPair = EOS;
649 0 : break;
650 : }
651 0 : UChar32 c = left[leftIndex++];
652 0 : leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
653 0 : if(leftPair < MIN_LONG) {
654 0 : leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
655 : }
656 0 : leftPair = getCases(variableTop, strengthIsPrimary, leftPair);
657 : }
658 :
659 0 : while(rightPair == 0) {
660 0 : if(rightIndex == rightLength) {
661 0 : rightPair = EOS;
662 0 : break;
663 : }
664 0 : UChar32 c = right[rightIndex++];
665 0 : rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
666 0 : if(rightPair < MIN_LONG) {
667 0 : rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
668 : }
669 0 : rightPair = getCases(variableTop, strengthIsPrimary, rightPair);
670 : }
671 :
672 0 : if(leftPair == rightPair) {
673 0 : if(leftPair == EOS) { break; }
674 0 : leftPair = rightPair = 0;
675 0 : continue;
676 : }
677 0 : uint32_t leftCase = leftPair & 0xffff;
678 0 : uint32_t rightCase = rightPair & 0xffff;
679 0 : if(leftCase != rightCase) {
680 0 : if((options & CollationSettings::UPPER_FIRST) == 0) {
681 0 : return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
682 : } else {
683 0 : return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
684 : }
685 : }
686 0 : if(leftPair == EOS) { break; }
687 0 : leftPair >>= 16;
688 0 : rightPair >>= 16;
689 0 : }
690 : }
691 0 : if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
692 :
693 : // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
694 0 : UBool withCaseBits = CollationSettings::isTertiaryWithCaseBits(options);
695 :
696 0 : leftIndex = rightIndex = 0;
697 0 : leftPair = rightPair = 0;
698 : for(;;) {
699 0 : while(leftPair == 0) {
700 0 : if(leftIndex == leftLength) {
701 0 : leftPair = EOS;
702 0 : break;
703 : }
704 0 : UChar32 c = left[leftIndex++];
705 0 : leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
706 0 : if(leftPair < MIN_LONG) {
707 0 : leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
708 : }
709 0 : leftPair = getTertiaries(variableTop, withCaseBits, leftPair);
710 : }
711 :
712 0 : while(rightPair == 0) {
713 0 : if(rightIndex == rightLength) {
714 0 : rightPair = EOS;
715 0 : break;
716 : }
717 0 : UChar32 c = right[rightIndex++];
718 0 : rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
719 0 : if(rightPair < MIN_LONG) {
720 0 : rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
721 : }
722 0 : rightPair = getTertiaries(variableTop, withCaseBits, rightPair);
723 : }
724 :
725 0 : if(leftPair == rightPair) {
726 0 : if(leftPair == EOS) { break; }
727 0 : leftPair = rightPair = 0;
728 0 : continue;
729 : }
730 0 : uint32_t leftTertiary = leftPair & 0xffff;
731 0 : uint32_t rightTertiary = rightPair & 0xffff;
732 0 : if(leftTertiary != rightTertiary) {
733 0 : if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
734 : // Pass through EOS and MERGE_WEIGHT
735 : // and keep real tertiary weights larger than the MERGE_WEIGHT.
736 : // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
737 0 : if(leftTertiary > MERGE_WEIGHT) {
738 0 : leftTertiary ^= CASE_MASK;
739 : }
740 0 : if(rightTertiary > MERGE_WEIGHT) {
741 0 : rightTertiary ^= CASE_MASK;
742 : }
743 : }
744 0 : return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
745 : }
746 0 : if(leftPair == EOS) { break; }
747 0 : leftPair >>= 16;
748 0 : rightPair >>= 16;
749 0 : }
750 0 : if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
751 :
752 0 : leftIndex = rightIndex = 0;
753 0 : leftPair = rightPair = 0;
754 : for(;;) {
755 0 : while(leftPair == 0) {
756 0 : if(leftIndex == leftLength) {
757 0 : leftPair = EOS;
758 0 : break;
759 : }
760 0 : UChar32 c = left[leftIndex++];
761 0 : leftPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, left, leftIndex);
762 0 : if(leftPair < MIN_LONG) {
763 0 : leftPair = nextPair(table, c, leftPair, NULL, left, leftIndex, leftLength);
764 : }
765 0 : leftPair = getQuaternaries(variableTop, leftPair);
766 : }
767 :
768 0 : while(rightPair == 0) {
769 0 : if(rightIndex == rightLength) {
770 0 : rightPair = EOS;
771 0 : break;
772 : }
773 0 : UChar32 c = right[rightIndex++];
774 0 : rightPair = (c <= 0x7f) ? table[c] : lookupUTF8Unsafe(table, c, right, rightIndex);
775 0 : if(rightPair < MIN_LONG) {
776 0 : rightPair = nextPair(table, c, rightPair, NULL, right, rightIndex, rightLength);
777 : }
778 0 : rightPair = getQuaternaries(variableTop, rightPair);
779 : }
780 :
781 0 : if(leftPair == rightPair) {
782 0 : if(leftPair == EOS) { break; }
783 0 : leftPair = rightPair = 0;
784 0 : continue;
785 : }
786 0 : uint32_t leftQuaternary = leftPair & 0xffff;
787 0 : uint32_t rightQuaternary = rightPair & 0xffff;
788 0 : if(leftQuaternary != rightQuaternary) {
789 0 : return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
790 : }
791 0 : if(leftPair == EOS) { break; }
792 0 : leftPair >>= 16;
793 0 : rightPair >>= 16;
794 0 : }
795 0 : return UCOL_EQUAL;
796 : }
797 :
798 : uint32_t
799 0 : CollationFastLatin::lookup(const uint16_t *table, UChar32 c) {
800 0 : U_ASSERT(c > LATIN_MAX);
801 0 : if(PUNCT_START <= c && c < PUNCT_LIMIT) {
802 0 : return table[c - PUNCT_START + LATIN_LIMIT];
803 0 : } else if(c == 0xfffe) {
804 0 : return MERGE_WEIGHT;
805 0 : } else if(c == 0xffff) {
806 0 : return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER;
807 : } else {
808 0 : return BAIL_OUT;
809 : }
810 : }
811 :
812 : uint32_t
813 0 : CollationFastLatin::lookupUTF8(const uint16_t *table, UChar32 c,
814 : const uint8_t *s8, int32_t &sIndex, int32_t sLength) {
815 : // The caller handled ASCII and valid/supported Latin.
816 0 : U_ASSERT(c > 0x7f);
817 0 : int32_t i2 = sIndex + 1;
818 0 : if(i2 < sLength || sLength < 0) {
819 0 : uint8_t t1 = s8[sIndex];
820 0 : uint8_t t2 = s8[i2];
821 0 : sIndex += 2;
822 0 : if(c == 0xe2 && t1 == 0x80 && 0x80 <= t2 && t2 <= 0xbf) {
823 0 : return table[(LATIN_LIMIT - 0x80) + t2]; // 2000..203F -> 0180..01BF
824 0 : } else if(c == 0xef && t1 == 0xbf) {
825 0 : if(t2 == 0xbe) {
826 0 : return MERGE_WEIGHT; // U+FFFE
827 0 : } else if(t2 == 0xbf) {
828 0 : return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER; // U+FFFF
829 : }
830 : }
831 : }
832 0 : return BAIL_OUT;
833 : }
834 :
835 : uint32_t
836 0 : CollationFastLatin::lookupUTF8Unsafe(const uint16_t *table, UChar32 c,
837 : const uint8_t *s8, int32_t &sIndex) {
838 : // The caller handled ASCII.
839 : // The string is well-formed and contains only supported characters.
840 0 : U_ASSERT(c > 0x7f);
841 0 : if(c <= LATIN_MAX_UTF8_LEAD) {
842 0 : return table[((c - 0xc2) << 6) + s8[sIndex++]]; // 0080..017F
843 : }
844 0 : uint8_t t2 = s8[sIndex + 1];
845 0 : sIndex += 2;
846 0 : if(c == 0xe2) {
847 0 : return table[(LATIN_LIMIT - 0x80) + t2]; // 2000..203F -> 0180..01BF
848 0 : } else if(t2 == 0xbe) {
849 0 : return MERGE_WEIGHT; // U+FFFE
850 : } else {
851 0 : return MAX_SHORT | COMMON_SEC | LOWER_CASE | COMMON_TER; // U+FFFF
852 : }
853 : }
854 :
855 : uint32_t
856 0 : CollationFastLatin::nextPair(const uint16_t *table, UChar32 c, uint32_t ce,
857 : const UChar *s16, const uint8_t *s8, int32_t &sIndex, int32_t &sLength) {
858 0 : if(ce >= MIN_LONG || ce < CONTRACTION) {
859 0 : return ce; // simple or special mini CE
860 0 : } else if(ce >= EXPANSION) {
861 0 : int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK);
862 0 : return ((uint32_t)table[index + 1] << 16) | table[index];
863 : } else /* ce >= CONTRACTION */ {
864 0 : if(c == 0 && sLength < 0) {
865 0 : sLength = sIndex - 1;
866 0 : return EOS;
867 : }
868 : // Contraction list: Default mapping followed by
869 : // 0 or more single-character contraction suffix mappings.
870 0 : int32_t index = NUM_FAST_CHARS + (ce & INDEX_MASK);
871 0 : if(sIndex != sLength) {
872 : // Read the next character.
873 : int32_t c2;
874 0 : int32_t nextIndex = sIndex;
875 0 : if(s16 != NULL) {
876 0 : c2 = s16[nextIndex++];
877 0 : if(c2 > LATIN_MAX) {
878 0 : if(PUNCT_START <= c2 && c2 < PUNCT_LIMIT) {
879 0 : c2 = c2 - PUNCT_START + LATIN_LIMIT; // 2000..203F -> 0180..01BF
880 0 : } else if(c2 == 0xfffe || c2 == 0xffff) {
881 0 : c2 = -1; // U+FFFE & U+FFFF cannot occur in contractions.
882 : } else {
883 0 : return BAIL_OUT;
884 : }
885 : }
886 : } else {
887 0 : c2 = s8[nextIndex++];
888 0 : if(c2 > 0x7f) {
889 : uint8_t t;
890 0 : if(c2 <= 0xc5 && 0xc2 <= c2 && nextIndex != sLength &&
891 0 : 0x80 <= (t = s8[nextIndex]) && t <= 0xbf) {
892 0 : c2 = ((c2 - 0xc2) << 6) + t; // 0080..017F
893 0 : ++nextIndex;
894 : } else {
895 0 : int32_t i2 = nextIndex + 1;
896 0 : if(i2 < sLength || sLength < 0) {
897 0 : if(c2 == 0xe2 && s8[nextIndex] == 0x80 &&
898 0 : 0x80 <= (t = s8[i2]) && t <= 0xbf) {
899 0 : c2 = (LATIN_LIMIT - 0x80) + t; // 2000..203F -> 0180..01BF
900 0 : } else if(c2 == 0xef && s8[nextIndex] == 0xbf &&
901 0 : ((t = s8[i2]) == 0xbe || t == 0xbf)) {
902 0 : c2 = -1; // U+FFFE & U+FFFF cannot occur in contractions.
903 : } else {
904 0 : return BAIL_OUT;
905 : }
906 : } else {
907 0 : return BAIL_OUT;
908 : }
909 0 : nextIndex += 2;
910 : }
911 : }
912 : }
913 0 : if(c2 == 0 && sLength < 0) {
914 0 : sLength = sIndex;
915 0 : c2 = -1;
916 : }
917 : // Look for the next character in the contraction suffix list,
918 : // which is in ascending order of single suffix characters.
919 0 : int32_t i = index;
920 0 : int32_t head = table[i]; // first skip the default mapping
921 : int32_t x;
922 0 : do {
923 0 : i += head >> CONTR_LENGTH_SHIFT;
924 0 : head = table[i];
925 0 : x = head & CONTR_CHAR_MASK;
926 0 : } while(x < c2);
927 0 : if(x == c2) {
928 0 : index = i;
929 0 : sIndex = nextIndex;
930 : }
931 : }
932 : // Return the CE or CEs for the default or contraction mapping.
933 0 : int32_t length = table[index] >> CONTR_LENGTH_SHIFT;
934 0 : if(length == 1) {
935 0 : return BAIL_OUT;
936 : }
937 0 : ce = table[index + 1];
938 0 : if(length == 2) {
939 0 : return ce;
940 : } else {
941 0 : return ((uint32_t)table[index + 2] << 16) | ce;
942 : }
943 : }
944 : }
945 :
946 : uint32_t
947 0 : CollationFastLatin::getSecondaries(uint32_t variableTop, uint32_t pair) {
948 0 : if(pair <= 0xffff) {
949 : // one mini CE
950 0 : if(pair >= MIN_SHORT) {
951 0 : pair = getSecondariesFromOneShortCE(pair);
952 0 : } else if(pair > variableTop) {
953 0 : pair = COMMON_SEC_PLUS_OFFSET;
954 0 : } else if(pair >= MIN_LONG) {
955 0 : pair = 0; // variable
956 : }
957 : // else special mini CE
958 : } else {
959 0 : uint32_t ce = pair & 0xffff;
960 0 : if(ce >= MIN_SHORT) {
961 0 : pair = (pair & TWO_SECONDARIES_MASK) + TWO_SEC_OFFSETS;
962 0 : } else if(ce > variableTop) {
963 0 : pair = TWO_COMMON_SEC_PLUS_OFFSET;
964 : } else {
965 0 : U_ASSERT(ce >= MIN_LONG);
966 0 : pair = 0; // variable
967 : }
968 : }
969 0 : return pair;
970 : }
971 :
972 : uint32_t
973 0 : CollationFastLatin::getCases(uint32_t variableTop, UBool strengthIsPrimary, uint32_t pair) {
974 : // Primary+caseLevel: Ignore case level weights of primary ignorables.
975 : // Otherwise: Ignore case level weights of secondary ignorables.
976 : // For details see the comments in the CollationCompare class.
977 : // Tertiary CEs (secondary ignorables) are not supported in fast Latin.
978 0 : if(pair <= 0xffff) {
979 : // one mini CE
980 0 : if(pair >= MIN_SHORT) {
981 : // A high secondary weight means we really have two CEs,
982 : // a primary CE and a secondary CE.
983 0 : uint32_t ce = pair;
984 0 : pair &= CASE_MASK; // explicit weight of primary CE
985 0 : if(!strengthIsPrimary && (ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
986 0 : pair |= LOWER_CASE << 16; // implied weight of secondary CE
987 : }
988 0 : } else if(pair > variableTop) {
989 0 : pair = LOWER_CASE;
990 0 : } else if(pair >= MIN_LONG) {
991 0 : pair = 0; // variable
992 : }
993 : // else special mini CE
994 : } else {
995 : // two mini CEs, same primary groups, neither expands like above
996 0 : uint32_t ce = pair & 0xffff;
997 0 : if(ce >= MIN_SHORT) {
998 0 : if(strengthIsPrimary && (pair & (SHORT_PRIMARY_MASK << 16)) == 0) {
999 0 : pair &= CASE_MASK;
1000 : } else {
1001 0 : pair &= TWO_CASES_MASK;
1002 : }
1003 0 : } else if(ce > variableTop) {
1004 0 : pair = TWO_LOWER_CASES;
1005 : } else {
1006 0 : U_ASSERT(ce >= MIN_LONG);
1007 0 : pair = 0; // variable
1008 : }
1009 : }
1010 0 : return pair;
1011 : }
1012 :
1013 : uint32_t
1014 0 : CollationFastLatin::getTertiaries(uint32_t variableTop, UBool withCaseBits, uint32_t pair) {
1015 0 : if(pair <= 0xffff) {
1016 : // one mini CE
1017 0 : if(pair >= MIN_SHORT) {
1018 : // A high secondary weight means we really have two CEs,
1019 : // a primary CE and a secondary CE.
1020 0 : uint32_t ce = pair;
1021 0 : if(withCaseBits) {
1022 0 : pair = (pair & CASE_AND_TERTIARY_MASK) + TER_OFFSET;
1023 0 : if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
1024 0 : pair |= (LOWER_CASE | COMMON_TER_PLUS_OFFSET) << 16;
1025 : }
1026 : } else {
1027 0 : pair = (pair & TERTIARY_MASK) + TER_OFFSET;
1028 0 : if((ce & SECONDARY_MASK) >= MIN_SEC_HIGH) {
1029 0 : pair |= COMMON_TER_PLUS_OFFSET << 16;
1030 : }
1031 : }
1032 0 : } else if(pair > variableTop) {
1033 0 : pair = (pair & TERTIARY_MASK) + TER_OFFSET;
1034 0 : if(withCaseBits) {
1035 0 : pair |= LOWER_CASE;
1036 : }
1037 0 : } else if(pair >= MIN_LONG) {
1038 0 : pair = 0; // variable
1039 : }
1040 : // else special mini CE
1041 : } else {
1042 : // two mini CEs, same primary groups, neither expands like above
1043 0 : uint32_t ce = pair & 0xffff;
1044 0 : if(ce >= MIN_SHORT) {
1045 0 : if(withCaseBits) {
1046 0 : pair &= TWO_CASES_MASK | TWO_TERTIARIES_MASK;
1047 : } else {
1048 0 : pair &= TWO_TERTIARIES_MASK;
1049 : }
1050 0 : pair += TWO_TER_OFFSETS;
1051 0 : } else if(ce > variableTop) {
1052 0 : pair = (pair & TWO_TERTIARIES_MASK) + TWO_TER_OFFSETS;
1053 0 : if(withCaseBits) {
1054 0 : pair |= TWO_LOWER_CASES;
1055 : }
1056 : } else {
1057 0 : U_ASSERT(ce >= MIN_LONG);
1058 0 : pair = 0; // variable
1059 : }
1060 : }
1061 0 : return pair;
1062 : }
1063 :
1064 : uint32_t
1065 0 : CollationFastLatin::getQuaternaries(uint32_t variableTop, uint32_t pair) {
1066 : // Return the primary weight of a variable CE,
1067 : // or the maximum primary weight for a non-variable, not-completely-ignorable CE.
1068 0 : if(pair <= 0xffff) {
1069 : // one mini CE
1070 0 : if(pair >= MIN_SHORT) {
1071 : // A high secondary weight means we really have two CEs,
1072 : // a primary CE and a secondary CE.
1073 0 : if((pair & SECONDARY_MASK) >= MIN_SEC_HIGH) {
1074 0 : pair = TWO_SHORT_PRIMARIES_MASK;
1075 : } else {
1076 0 : pair = SHORT_PRIMARY_MASK;
1077 : }
1078 0 : } else if(pair > variableTop) {
1079 0 : pair = SHORT_PRIMARY_MASK;
1080 0 : } else if(pair >= MIN_LONG) {
1081 0 : pair &= LONG_PRIMARY_MASK; // variable
1082 : }
1083 : // else special mini CE
1084 : } else {
1085 : // two mini CEs, same primary groups, neither expands like above
1086 0 : uint32_t ce = pair & 0xffff;
1087 0 : if(ce > variableTop) {
1088 0 : pair = TWO_SHORT_PRIMARIES_MASK;
1089 : } else {
1090 0 : U_ASSERT(ce >= MIN_LONG);
1091 0 : pair &= TWO_LONG_PRIMARIES_MASK; // variable
1092 : }
1093 : }
1094 0 : return pair;
1095 : }
1096 :
1097 : U_NAMESPACE_END
1098 :
1099 : #endif // !UCONFIG_NO_COLLATION
|