Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2013-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collationrootelements.cpp
9 : *
10 : * created on: 2013mar05
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #include "unicode/utypes.h"
15 :
16 : #if !UCONFIG_NO_COLLATION
17 :
18 : #include "collation.h"
19 : #include "collationrootelements.h"
20 : #include "uassert.h"
21 :
22 : U_NAMESPACE_BEGIN
23 :
24 : int64_t
25 0 : CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
26 0 : if(p == 0) { return 0; }
27 0 : U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
28 0 : int32_t index = findP(p);
29 0 : uint32_t q = elements[index];
30 : uint32_t secTer;
31 0 : if(p == (q & 0xffffff00)) {
32 : // p == elements[index] is a root primary. Find the CE before it.
33 : // We must not be in a primary range.
34 0 : U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
35 0 : secTer = elements[index - 1];
36 0 : if((secTer & SEC_TER_DELTA_FLAG) == 0) {
37 : // Primary CE just before p.
38 0 : p = secTer & 0xffffff00;
39 0 : secTer = Collation::COMMON_SEC_AND_TER_CE;
40 : } else {
41 : // secTer = last secondary & tertiary for the previous primary
42 0 : index -= 2;
43 : for(;;) {
44 0 : p = elements[index];
45 0 : if((p & SEC_TER_DELTA_FLAG) == 0) {
46 0 : p &= 0xffffff00;
47 0 : break;
48 : }
49 0 : --index;
50 : }
51 : }
52 : } else {
53 : // p > elements[index] which is the previous primary.
54 : // Find the last secondary & tertiary weights for it.
55 0 : p = q & 0xffffff00;
56 0 : secTer = Collation::COMMON_SEC_AND_TER_CE;
57 : for(;;) {
58 0 : q = elements[++index];
59 0 : if((q & SEC_TER_DELTA_FLAG) == 0) {
60 : // We must not be in a primary range.
61 0 : U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
62 0 : break;
63 : }
64 0 : secTer = q;
65 : }
66 : }
67 0 : return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
68 : }
69 :
70 : int64_t
71 0 : CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
72 0 : if(p == 0) { return 0; }
73 0 : int32_t index = findP(p);
74 0 : if(p != (elements[index] & 0xffffff00)) {
75 : for(;;) {
76 0 : p = elements[++index];
77 0 : if((p & SEC_TER_DELTA_FLAG) == 0) {
78 : // First primary after p. We must not be in a primary range.
79 0 : U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
80 0 : break;
81 : }
82 : }
83 : }
84 : // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85 0 : return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
86 : }
87 :
88 : uint32_t
89 0 : CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
90 0 : int32_t index = findPrimary(p);
91 : int32_t step;
92 0 : uint32_t q = elements[index];
93 0 : if(p == (q & 0xffffff00)) {
94 : // Found p itself. Return the previous primary.
95 : // See if p is at the end of a previous range.
96 0 : step = (int32_t)q & PRIMARY_STEP_MASK;
97 0 : if(step == 0) {
98 : // p is not at the end of a range. Look for the previous primary.
99 0 : do {
100 0 : p = elements[--index];
101 0 : } while((p & SEC_TER_DELTA_FLAG) != 0);
102 0 : return p & 0xffffff00;
103 : }
104 : } else {
105 : // p is in a range, and not at the start.
106 0 : uint32_t nextElement = elements[index + 1];
107 0 : U_ASSERT(isEndOfPrimaryRange(nextElement));
108 0 : step = (int32_t)nextElement & PRIMARY_STEP_MASK;
109 : }
110 : // Return the previous range primary.
111 0 : if((p & 0xffff) == 0) {
112 0 : return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
113 : } else {
114 0 : return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
115 : }
116 : }
117 :
118 : uint32_t
119 0 : CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
120 : int32_t index;
121 : uint32_t previousSec, sec;
122 0 : if(p == 0) {
123 0 : index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
124 : // Gap at the beginning of the secondary CE range.
125 0 : previousSec = 0;
126 0 : sec = elements[index] >> 16;
127 : } else {
128 0 : index = findPrimary(p) + 1;
129 0 : previousSec = Collation::BEFORE_WEIGHT16;
130 0 : sec = getFirstSecTerForPrimary(index) >> 16;
131 : }
132 0 : U_ASSERT(s >= sec);
133 0 : while(s > sec) {
134 0 : previousSec = sec;
135 0 : U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
136 0 : sec = elements[index++] >> 16;
137 : }
138 0 : U_ASSERT(sec == s);
139 0 : return previousSec;
140 : }
141 :
142 : uint32_t
143 0 : CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
144 0 : U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
145 : int32_t index;
146 : uint32_t previousTer, secTer;
147 0 : if(p == 0) {
148 0 : if(s == 0) {
149 0 : index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
150 : // Gap at the beginning of the tertiary CE range.
151 0 : previousTer = 0;
152 : } else {
153 0 : index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
154 0 : previousTer = Collation::BEFORE_WEIGHT16;
155 : }
156 0 : secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
157 : } else {
158 0 : index = findPrimary(p) + 1;
159 0 : previousTer = Collation::BEFORE_WEIGHT16;
160 0 : secTer = getFirstSecTerForPrimary(index);
161 : }
162 0 : uint32_t st = (s << 16) | t;
163 0 : while(st > secTer) {
164 0 : if((secTer >> 16) == s) { previousTer = secTer; }
165 0 : U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
166 0 : secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
167 : }
168 0 : U_ASSERT(secTer == st);
169 0 : return previousTer & 0xffff;
170 : }
171 :
172 : uint32_t
173 0 : CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
174 0 : U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
175 0 : uint32_t q = elements[++index];
176 : int32_t step;
177 0 : if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
178 : // Return the next primary in this range.
179 0 : if((p & 0xffff) == 0) {
180 0 : return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
181 : } else {
182 0 : return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
183 : }
184 : } else {
185 : // Return the next primary in the list.
186 0 : while((q & SEC_TER_DELTA_FLAG) != 0) {
187 0 : q = elements[++index];
188 : }
189 0 : U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
190 0 : return q;
191 : }
192 : }
193 :
194 : uint32_t
195 0 : CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
196 : uint32_t secTer;
197 : uint32_t secLimit;
198 0 : if(index == 0) {
199 : // primary = 0
200 0 : U_ASSERT(s != 0);
201 0 : index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
202 0 : secTer = elements[index];
203 : // Gap at the end of the secondary CE range.
204 0 : secLimit = 0x10000;
205 : } else {
206 0 : U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
207 0 : secTer = getFirstSecTerForPrimary(index + 1);
208 : // If this is an explicit sec/ter unit, then it will be read once more.
209 : // Gap for secondaries of primary CEs.
210 0 : secLimit = getSecondaryBoundary();
211 : }
212 : for(;;) {
213 0 : uint32_t sec = secTer >> 16;
214 0 : if(sec > s) { return sec; }
215 0 : secTer = elements[++index];
216 0 : if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
217 0 : }
218 : }
219 :
220 : uint32_t
221 0 : CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
222 : uint32_t secTer;
223 : uint32_t terLimit;
224 0 : if(index == 0) {
225 : // primary = 0
226 0 : if(s == 0) {
227 0 : U_ASSERT(t != 0);
228 0 : index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
229 : // Gap at the end of the tertiary CE range.
230 0 : terLimit = 0x4000;
231 : } else {
232 0 : index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
233 : // Gap for tertiaries of primary/secondary CEs.
234 0 : terLimit = getTertiaryBoundary();
235 : }
236 0 : secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
237 : } else {
238 0 : U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
239 0 : secTer = getFirstSecTerForPrimary(index + 1);
240 : // If this is an explicit sec/ter unit, then it will be read once more.
241 0 : terLimit = getTertiaryBoundary();
242 : }
243 0 : uint32_t st = (s << 16) | t;
244 : for(;;) {
245 0 : if(secTer > st) {
246 0 : U_ASSERT((secTer >> 16) == s);
247 0 : return secTer & 0xffff;
248 : }
249 0 : secTer = elements[++index];
250 : // No tertiary greater than t for this primary+secondary.
251 0 : if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
252 0 : secTer &= ~SEC_TER_DELTA_FLAG;
253 : }
254 : }
255 :
256 : uint32_t
257 0 : CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
258 0 : uint32_t secTer = elements[index];
259 0 : if((secTer & SEC_TER_DELTA_FLAG) == 0) {
260 : // No sec/ter delta.
261 0 : return Collation::COMMON_SEC_AND_TER_CE;
262 : }
263 0 : secTer &= ~SEC_TER_DELTA_FLAG;
264 0 : if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
265 : // Implied sec/ter.
266 0 : return Collation::COMMON_SEC_AND_TER_CE;
267 : }
268 : // Explicit sec/ter below common/common.
269 0 : return secTer;
270 : }
271 :
272 : int32_t
273 0 : CollationRootElements::findPrimary(uint32_t p) const {
274 : // Requirement: p must occur as a root primary.
275 0 : U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary
276 0 : int32_t index = findP(p);
277 : // If p is in a range, then we just assume that p is an actual primary in this range.
278 : // (Too cumbersome/expensive to check.)
279 : // Otherwise, it must be an exact match.
280 0 : U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
281 0 : return index;
282 : }
283 :
284 : int32_t
285 0 : CollationRootElements::findP(uint32_t p) const {
286 : // p need not occur as a root primary.
287 : // For example, it might be a reordering group boundary.
288 0 : U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
289 : // modified binary search
290 0 : int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
291 0 : U_ASSERT(p >= elements[start]);
292 0 : int32_t limit = length - 1;
293 0 : U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
294 0 : U_ASSERT(p < elements[limit]);
295 0 : while((start + 1) < limit) {
296 : // Invariant: elements[start] and elements[limit] are primaries,
297 : // and elements[start]<=p<=elements[limit].
298 0 : int32_t i = (start + limit) / 2;
299 0 : uint32_t q = elements[i];
300 0 : if((q & SEC_TER_DELTA_FLAG) != 0) {
301 : // Find the next primary.
302 0 : int32_t j = i + 1;
303 : for(;;) {
304 0 : if(j == limit) { break; }
305 0 : q = elements[j];
306 0 : if((q & SEC_TER_DELTA_FLAG) == 0) {
307 0 : i = j;
308 0 : break;
309 : }
310 0 : ++j;
311 : }
312 0 : if((q & SEC_TER_DELTA_FLAG) != 0) {
313 : // Find the preceding primary.
314 0 : j = i - 1;
315 : for(;;) {
316 0 : if(j == start) { break; }
317 0 : q = elements[j];
318 0 : if((q & SEC_TER_DELTA_FLAG) == 0) {
319 0 : i = j;
320 0 : break;
321 : }
322 0 : --j;
323 : }
324 0 : if((q & SEC_TER_DELTA_FLAG) != 0) {
325 : // No primary between start and limit.
326 0 : break;
327 : }
328 : }
329 : }
330 0 : if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary.
331 0 : limit = i;
332 : } else {
333 0 : start = i;
334 : }
335 : }
336 0 : return start;
337 : }
338 :
339 : U_NAMESPACE_END
340 :
341 : #endif // !UCONFIG_NO_COLLATION
|