Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2013-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collationsettings.cpp
9 : *
10 : * created on: 2013feb07
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #include "unicode/utypes.h"
15 :
16 : #if !UCONFIG_NO_COLLATION
17 :
18 : #include "unicode/ucol.h"
19 : #include "cmemory.h"
20 : #include "collation.h"
21 : #include "collationdata.h"
22 : #include "collationsettings.h"
23 : #include "sharedobject.h"
24 : #include "uassert.h"
25 : #include "umutex.h"
26 : #include "uvectr32.h"
27 :
28 : U_NAMESPACE_BEGIN
29 :
30 0 : CollationSettings::CollationSettings(const CollationSettings &other)
31 : : SharedObject(other),
32 0 : options(other.options), variableTop(other.variableTop),
33 : reorderTable(NULL),
34 0 : minHighNoReorder(other.minHighNoReorder),
35 : reorderRanges(NULL), reorderRangesLength(0),
36 : reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
37 0 : fastLatinOptions(other.fastLatinOptions) {
38 0 : UErrorCode errorCode = U_ZERO_ERROR;
39 0 : copyReorderingFrom(other, errorCode);
40 0 : if(fastLatinOptions >= 0) {
41 0 : uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
42 : }
43 0 : }
44 :
45 0 : CollationSettings::~CollationSettings() {
46 0 : if(reorderCodesCapacity != 0) {
47 0 : uprv_free(const_cast<int32_t *>(reorderCodes));
48 : }
49 0 : }
50 :
51 : UBool
52 0 : CollationSettings::operator==(const CollationSettings &other) const {
53 0 : if(options != other.options) { return FALSE; }
54 0 : if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
55 0 : if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
56 0 : for(int32_t i = 0; i < reorderCodesLength; ++i) {
57 0 : if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
58 : }
59 0 : return TRUE;
60 : }
61 :
62 : int32_t
63 0 : CollationSettings::hashCode() const {
64 0 : int32_t h = options << 8;
65 0 : if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
66 0 : h ^= reorderCodesLength;
67 0 : for(int32_t i = 0; i < reorderCodesLength; ++i) {
68 0 : h ^= (reorderCodes[i] << i);
69 : }
70 0 : return h;
71 : }
72 :
73 : void
74 0 : CollationSettings::resetReordering() {
75 : // When we turn off reordering, we want to set a NULL permutation
76 : // rather than a no-op permutation.
77 : // Keep the memory via reorderCodes and its capacity.
78 0 : reorderTable = NULL;
79 0 : minHighNoReorder = 0;
80 0 : reorderRangesLength = 0;
81 0 : reorderCodesLength = 0;
82 0 : }
83 :
84 : void
85 0 : CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
86 : const uint32_t *ranges, int32_t rangesLength,
87 : const uint8_t *table, UErrorCode &errorCode) {
88 0 : if(U_FAILURE(errorCode)) { return; }
89 0 : if(table != NULL &&
90 0 : (rangesLength == 0 ?
91 0 : !reorderTableHasSplitBytes(table) :
92 0 : rangesLength >= 2 &&
93 : // The first offset must be 0. The last offset must not be 0.
94 0 : (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
95 : // We need to release the memory before setting the alias pointer.
96 0 : if(reorderCodesCapacity != 0) {
97 0 : uprv_free(const_cast<int32_t *>(reorderCodes));
98 0 : reorderCodesCapacity = 0;
99 : }
100 0 : reorderTable = table;
101 0 : reorderCodes = codes;
102 0 : reorderCodesLength = length;
103 : // Drop ranges before the first split byte. They are reordered by the table.
104 : // This then speeds up reordering of the remaining ranges.
105 0 : int32_t firstSplitByteRangeIndex = 0;
106 0 : while(firstSplitByteRangeIndex < rangesLength &&
107 0 : (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
108 : // The second byte of the primary limit is 0.
109 0 : ++firstSplitByteRangeIndex;
110 : }
111 0 : if(firstSplitByteRangeIndex == rangesLength) {
112 0 : U_ASSERT(!reorderTableHasSplitBytes(table));
113 0 : minHighNoReorder = 0;
114 0 : reorderRanges = NULL;
115 0 : reorderRangesLength = 0;
116 : } else {
117 0 : U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
118 0 : minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
119 0 : reorderRanges = ranges + firstSplitByteRangeIndex;
120 0 : reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
121 : }
122 0 : return;
123 : }
124 : // Regenerate missing data.
125 0 : setReordering(data, codes, length, errorCode);
126 : }
127 :
128 : void
129 0 : CollationSettings::setReordering(const CollationData &data,
130 : const int32_t *codes, int32_t codesLength,
131 : UErrorCode &errorCode) {
132 0 : if(U_FAILURE(errorCode)) { return; }
133 0 : if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
134 0 : resetReordering();
135 0 : return;
136 : }
137 0 : UVector32 rangesList(errorCode);
138 0 : data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
139 0 : if(U_FAILURE(errorCode)) { return; }
140 0 : int32_t rangesLength = rangesList.size();
141 0 : if(rangesLength == 0) {
142 0 : resetReordering();
143 0 : return;
144 : }
145 0 : const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
146 : // ranges[] contains at least two (limit, offset) pairs.
147 : // The first offset must be 0. The last offset must not be 0.
148 : // Separators (at the low end) and trailing weights (at the high end)
149 : // are never reordered.
150 0 : U_ASSERT(rangesLength >= 2);
151 0 : U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
152 0 : minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
153 :
154 : // Write the lead byte permutation table.
155 : // Set a 0 for each lead byte that has a range boundary in the middle.
156 : uint8_t table[256];
157 0 : int32_t b = 0;
158 0 : int32_t firstSplitByteRangeIndex = -1;
159 0 : for(int32_t i = 0; i < rangesLength; ++i) {
160 0 : uint32_t pair = ranges[i];
161 0 : int32_t limit1 = (int32_t)(pair >> 24);
162 0 : while(b < limit1) {
163 0 : table[b] = (uint8_t)(b + pair);
164 0 : ++b;
165 : }
166 : // Check the second byte of the limit.
167 0 : if((pair & 0xff0000) != 0) {
168 0 : table[limit1] = 0;
169 0 : b = limit1 + 1;
170 0 : if(firstSplitByteRangeIndex < 0) {
171 0 : firstSplitByteRangeIndex = i;
172 : }
173 : }
174 : }
175 0 : while(b <= 0xff) {
176 0 : table[b] = (uint8_t)b;
177 0 : ++b;
178 : }
179 0 : if(firstSplitByteRangeIndex < 0) {
180 : // The lead byte permutation table alone suffices for reordering.
181 0 : rangesLength = 0;
182 : } else {
183 : // Remove the ranges below the first split byte.
184 0 : ranges += firstSplitByteRangeIndex;
185 0 : rangesLength -= firstSplitByteRangeIndex;
186 : }
187 0 : setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
188 : }
189 :
190 : void
191 0 : CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
192 : const uint32_t *ranges, int32_t rangesLength,
193 : const uint8_t *table, UErrorCode &errorCode) {
194 0 : if(U_FAILURE(errorCode)) { return; }
195 : int32_t *ownedCodes;
196 0 : int32_t totalLength = codesLength + rangesLength;
197 0 : U_ASSERT(totalLength > 0);
198 0 : if(totalLength <= reorderCodesCapacity) {
199 0 : ownedCodes = const_cast<int32_t *>(reorderCodes);
200 : } else {
201 : // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
202 0 : int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
203 0 : ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
204 0 : if(ownedCodes == NULL) {
205 0 : resetReordering();
206 0 : errorCode = U_MEMORY_ALLOCATION_ERROR;
207 0 : return;
208 : }
209 0 : if(reorderCodesCapacity != 0) {
210 0 : uprv_free(const_cast<int32_t *>(reorderCodes));
211 : }
212 0 : reorderCodes = ownedCodes;
213 0 : reorderCodesCapacity = capacity;
214 : }
215 0 : uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
216 0 : uprv_memcpy(ownedCodes, codes, codesLength * 4);
217 0 : uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
218 0 : reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
219 0 : reorderCodesLength = codesLength;
220 0 : reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
221 0 : reorderRangesLength = rangesLength;
222 : }
223 :
224 : void
225 0 : CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
226 0 : if(U_FAILURE(errorCode)) { return; }
227 0 : if(!other.hasReordering()) {
228 0 : resetReordering();
229 0 : return;
230 : }
231 0 : minHighNoReorder = other.minHighNoReorder;
232 0 : if(other.reorderCodesCapacity == 0) {
233 : // The reorder arrays are aliased to memory-mapped data.
234 0 : reorderTable = other.reorderTable;
235 0 : reorderRanges = other.reorderRanges;
236 0 : reorderRangesLength = other.reorderRangesLength;
237 0 : reorderCodes = other.reorderCodes;
238 0 : reorderCodesLength = other.reorderCodesLength;
239 : } else {
240 0 : setReorderArrays(other.reorderCodes, other.reorderCodesLength,
241 0 : other.reorderRanges, other.reorderRangesLength,
242 0 : other.reorderTable, errorCode);
243 : }
244 : }
245 :
246 : UBool
247 0 : CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
248 0 : U_ASSERT(table[0] == 0);
249 0 : for(int32_t i = 1; i < 256; ++i) {
250 0 : if(table[i] == 0) {
251 0 : return TRUE;
252 : }
253 : }
254 0 : return FALSE;
255 : }
256 :
257 : uint32_t
258 0 : CollationSettings::reorderEx(uint32_t p) const {
259 0 : if(p >= minHighNoReorder) { return p; }
260 : // Round up p so that its lower 16 bits are >= any offset bits.
261 : // Then compare q directly with (limit, offset) pairs.
262 0 : uint32_t q = p | 0xffff;
263 : uint32_t r;
264 0 : const uint32_t *ranges = reorderRanges;
265 0 : while(q >= (r = *ranges)) { ++ranges; }
266 0 : return p + (r << 24);
267 : }
268 :
269 : void
270 0 : CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
271 0 : if(U_FAILURE(errorCode)) { return; }
272 0 : int32_t noStrength = options & ~STRENGTH_MASK;
273 0 : switch(value) {
274 : case UCOL_PRIMARY:
275 : case UCOL_SECONDARY:
276 : case UCOL_TERTIARY:
277 : case UCOL_QUATERNARY:
278 : case UCOL_IDENTICAL:
279 0 : options = noStrength | (value << STRENGTH_SHIFT);
280 0 : break;
281 : case UCOL_DEFAULT:
282 0 : options = noStrength | (defaultOptions & STRENGTH_MASK);
283 0 : break;
284 : default:
285 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286 0 : break;
287 : }
288 : }
289 :
290 : void
291 0 : CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
292 : int32_t defaultOptions, UErrorCode &errorCode) {
293 0 : if(U_FAILURE(errorCode)) { return; }
294 0 : switch(value) {
295 : case UCOL_ON:
296 0 : options |= bit;
297 0 : break;
298 : case UCOL_OFF:
299 0 : options &= ~bit;
300 0 : break;
301 : case UCOL_DEFAULT:
302 0 : options = (options & ~bit) | (defaultOptions & bit);
303 0 : break;
304 : default:
305 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
306 0 : break;
307 : }
308 : }
309 :
310 : void
311 0 : CollationSettings::setCaseFirst(UColAttributeValue value,
312 : int32_t defaultOptions, UErrorCode &errorCode) {
313 0 : if(U_FAILURE(errorCode)) { return; }
314 0 : int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
315 0 : switch(value) {
316 : case UCOL_OFF:
317 0 : options = noCaseFirst;
318 0 : break;
319 : case UCOL_LOWER_FIRST:
320 0 : options = noCaseFirst | CASE_FIRST;
321 0 : break;
322 : case UCOL_UPPER_FIRST:
323 0 : options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
324 0 : break;
325 : case UCOL_DEFAULT:
326 0 : options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
327 0 : break;
328 : default:
329 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
330 0 : break;
331 : }
332 : }
333 :
334 : void
335 0 : CollationSettings::setAlternateHandling(UColAttributeValue value,
336 : int32_t defaultOptions, UErrorCode &errorCode) {
337 0 : if(U_FAILURE(errorCode)) { return; }
338 0 : int32_t noAlternate = options & ~ALTERNATE_MASK;
339 0 : switch(value) {
340 : case UCOL_NON_IGNORABLE:
341 0 : options = noAlternate;
342 0 : break;
343 : case UCOL_SHIFTED:
344 0 : options = noAlternate | SHIFTED;
345 0 : break;
346 : case UCOL_DEFAULT:
347 0 : options = noAlternate | (defaultOptions & ALTERNATE_MASK);
348 0 : break;
349 : default:
350 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
351 0 : break;
352 : }
353 : }
354 :
355 : void
356 0 : CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
357 0 : if(U_FAILURE(errorCode)) { return; }
358 0 : int32_t noMax = options & ~MAX_VARIABLE_MASK;
359 0 : switch(value) {
360 : case MAX_VAR_SPACE:
361 : case MAX_VAR_PUNCT:
362 : case MAX_VAR_SYMBOL:
363 : case MAX_VAR_CURRENCY:
364 0 : options = noMax | (value << MAX_VARIABLE_SHIFT);
365 0 : break;
366 : case UCOL_DEFAULT:
367 0 : options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
368 0 : break;
369 : default:
370 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
371 0 : break;
372 : }
373 : }
374 :
375 : U_NAMESPACE_END
376 :
377 : #endif // !UCONFIG_NO_COLLATION
|