Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2010-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * collation.cpp
9 : *
10 : * created on: 2010oct27
11 : * created by: Markus W. Scherer
12 : */
13 :
14 : #include "unicode/utypes.h"
15 :
16 : #if !UCONFIG_NO_COLLATION
17 :
18 : #include "collation.h"
19 : #include "uassert.h"
20 :
21 : U_NAMESPACE_BEGIN
22 :
23 : // Some compilers don't care if constants are defined in the .cpp file.
24 : // MS Visual C++ does not like it, but gcc requires it. clang does not care.
25 : #ifndef _MSC_VER
26 : const uint8_t Collation::LEVEL_SEPARATOR_BYTE;
27 : const uint8_t Collation::MERGE_SEPARATOR_BYTE;
28 : const uint32_t Collation::ONLY_TERTIARY_MASK;
29 : const uint32_t Collation::CASE_AND_TERTIARY_MASK;
30 : #endif
31 :
32 : uint32_t
33 0 : Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
34 : // Extract the second byte, minus the minimum byte value,
35 : // plus the offset, modulo the number of usable byte values, plus the minimum.
36 : // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
37 : uint32_t primary;
38 0 : if(isCompressible) {
39 0 : offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
40 0 : primary = (uint32_t)((offset % 251) + 4) << 16;
41 0 : offset /= 251;
42 : } else {
43 0 : offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
44 0 : primary = (uint32_t)((offset % 254) + 2) << 16;
45 0 : offset /= 254;
46 : }
47 : // First byte, assume no further overflow.
48 0 : return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
49 : }
50 :
51 : uint32_t
52 0 : Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
53 : // Extract the third byte, minus the minimum byte value,
54 : // plus the offset, modulo the number of usable byte values, plus the minimum.
55 0 : offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
56 0 : uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
57 0 : offset /= 254;
58 : // Same with the second byte,
59 : // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
60 0 : if(isCompressible) {
61 0 : offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
62 0 : primary |= (uint32_t)((offset % 251) + 4) << 16;
63 0 : offset /= 251;
64 : } else {
65 0 : offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
66 0 : primary |= (uint32_t)((offset % 254) + 2) << 16;
67 0 : offset /= 254;
68 : }
69 : // First byte, assume no further overflow.
70 0 : return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
71 : }
72 :
73 : uint32_t
74 0 : Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
75 : // Extract the second byte, minus the minimum byte value,
76 : // minus the step, modulo the number of usable byte values, plus the minimum.
77 : // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
78 : // Assume no further underflow for the first byte.
79 0 : U_ASSERT(0 < step && step <= 0x7f);
80 0 : int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
81 0 : if(isCompressible) {
82 0 : if(byte2 < 4) {
83 0 : byte2 += 251;
84 0 : basePrimary -= 0x1000000;
85 : }
86 : } else {
87 0 : if(byte2 < 2) {
88 0 : byte2 += 254;
89 0 : basePrimary -= 0x1000000;
90 : }
91 : }
92 0 : return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
93 : }
94 :
95 : uint32_t
96 0 : Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
97 : // Extract the third byte, minus the minimum byte value,
98 : // minus the step, modulo the number of usable byte values, plus the minimum.
99 0 : U_ASSERT(0 < step && step <= 0x7f);
100 0 : int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
101 0 : if(byte3 >= 2) {
102 0 : return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
103 : }
104 0 : byte3 += 254;
105 : // Same with the second byte,
106 : // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
107 0 : int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
108 0 : if(isCompressible) {
109 0 : if(byte2 < 4) {
110 0 : byte2 = 0xfe;
111 0 : basePrimary -= 0x1000000;
112 : }
113 : } else {
114 0 : if(byte2 < 2) {
115 0 : byte2 = 0xff;
116 0 : basePrimary -= 0x1000000;
117 : }
118 : }
119 : // First byte, assume no further underflow.
120 0 : return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
121 : }
122 :
123 : uint32_t
124 0 : Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
125 0 : uint32_t p = (uint32_t)(dataCE >> 32); // three-byte primary pppppp00
126 0 : int32_t lower32 = (int32_t)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible)
127 0 : int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment
128 0 : UBool isCompressible = (lower32 & 0x80) != 0;
129 0 : return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
130 : }
131 :
132 : uint32_t
133 0 : Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
134 : // Create a gap before U+0000. Use c=-1 for [first unassigned].
135 0 : ++c;
136 : // Fourth byte: 18 values, every 14th byte value (gap of 13).
137 0 : uint32_t primary = 2 + (c % 18) * 14;
138 0 : c /= 18;
139 : // Third byte: 254 values.
140 0 : primary |= (2 + (c % 254)) << 8;
141 0 : c /= 254;
142 : // Second byte: 251 values 04..FE excluding the primary compression bytes.
143 0 : primary |= (4 + (c % 251)) << 16;
144 : // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
145 0 : return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
146 : }
147 :
148 : U_NAMESPACE_END
149 :
150 : #endif // !UCONFIG_NO_COLLATION
|