Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 1996-2012, International Business Machines Corporation and
6 : * others. All Rights Reserved.
7 : *******************************************************************************
8 : */
9 : //===============================================================================
10 : //
11 : // File sortkey.cpp
12 : //
13 : //
14 : //
15 : // Created by: Helena Shih
16 : //
17 : // Modification History:
18 : //
19 : // Date Name Description
20 : //
21 : // 6/20/97 helena Java class name change.
22 : // 6/23/97 helena Added comments to make code more readable.
23 : // 6/26/98 erm Canged to use byte arrays instead of UnicodeString
24 : // 7/31/98 erm hashCode: minimum inc should be 2 not 1,
25 : // Cleaned up operator=
26 : // 07/12/99 helena HPUX 11 CC port.
27 : // 03/06/01 synwee Modified compareTo, to handle the result of
28 : // 2 string similar in contents, but one is longer
29 : // than the other
30 : //===============================================================================
31 :
32 : #include "unicode/utypes.h"
33 :
34 : #if !UCONFIG_NO_COLLATION
35 :
36 : #include "unicode/sortkey.h"
37 : #include "cmemory.h"
38 : #include "uelement.h"
39 : #include "ustr_imp.h"
40 :
41 : U_NAMESPACE_BEGIN
42 :
43 : // A hash code of kInvalidHashCode indicates that the hash code needs
44 : // to be computed. A hash code of kEmptyHashCode is used for empty keys
45 : // and for any key whose computed hash code is kInvalidHashCode.
46 : static const int32_t kInvalidHashCode = 0;
47 : static const int32_t kEmptyHashCode = 1;
48 : // The "bogus hash code" replaces a separate fBogus flag.
49 : static const int32_t kBogusHashCode = 2;
50 :
51 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
52 :
53 0 : CollationKey::CollationKey()
54 : : UObject(), fFlagAndLength(0),
55 0 : fHashCode(kEmptyHashCode)
56 : {
57 0 : }
58 :
59 : // Create a collation key from a bit array.
60 0 : CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
61 : : UObject(), fFlagAndLength(count),
62 0 : fHashCode(kInvalidHashCode)
63 : {
64 0 : if (count < 0 || (newValues == NULL && count != 0) ||
65 0 : (count > getCapacity() && reallocate(count, 0) == NULL)) {
66 0 : setToBogus();
67 0 : return;
68 : }
69 :
70 0 : if (count > 0) {
71 0 : uprv_memcpy(getBytes(), newValues, count);
72 : }
73 : }
74 :
75 0 : CollationKey::CollationKey(const CollationKey& other)
76 0 : : UObject(other), fFlagAndLength(other.getLength()),
77 0 : fHashCode(other.fHashCode)
78 : {
79 0 : if (other.isBogus())
80 : {
81 0 : setToBogus();
82 0 : return;
83 : }
84 :
85 0 : int32_t length = fFlagAndLength;
86 0 : if (length > getCapacity() && reallocate(length, 0) == NULL) {
87 0 : setToBogus();
88 0 : return;
89 : }
90 :
91 0 : if (length > 0) {
92 0 : uprv_memcpy(getBytes(), other.getBytes(), length);
93 : }
94 : }
95 :
96 0 : CollationKey::~CollationKey()
97 : {
98 0 : if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
99 0 : }
100 :
101 0 : uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
102 0 : uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
103 0 : if(newBytes == NULL) { return NULL; }
104 0 : if(length > 0) {
105 0 : uprv_memcpy(newBytes, getBytes(), length);
106 : }
107 0 : if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
108 0 : fUnion.fFields.fBytes = newBytes;
109 0 : fUnion.fFields.fCapacity = newCapacity;
110 0 : fFlagAndLength |= 0x80000000;
111 0 : return newBytes;
112 : }
113 :
114 0 : void CollationKey::setLength(int32_t newLength) {
115 : // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
116 0 : fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
117 0 : fHashCode = kInvalidHashCode;
118 0 : }
119 :
120 : // set the key to an empty state
121 : CollationKey&
122 0 : CollationKey::reset()
123 : {
124 0 : fFlagAndLength &= 0x80000000;
125 0 : fHashCode = kEmptyHashCode;
126 :
127 0 : return *this;
128 : }
129 :
130 : // set the key to a "bogus" or invalid state
131 : CollationKey&
132 0 : CollationKey::setToBogus()
133 : {
134 0 : fFlagAndLength &= 0x80000000;
135 0 : fHashCode = kBogusHashCode;
136 :
137 0 : return *this;
138 : }
139 :
140 : UBool
141 0 : CollationKey::operator==(const CollationKey& source) const
142 : {
143 0 : return getLength() == source.getLength() &&
144 0 : (this == &source ||
145 0 : uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
146 : }
147 :
148 : const CollationKey&
149 0 : CollationKey::operator=(const CollationKey& other)
150 : {
151 0 : if (this != &other)
152 : {
153 0 : if (other.isBogus())
154 : {
155 0 : return setToBogus();
156 : }
157 :
158 0 : int32_t length = other.getLength();
159 0 : if (length > getCapacity() && reallocate(length, 0) == NULL) {
160 0 : return setToBogus();
161 : }
162 0 : if (length > 0) {
163 0 : uprv_memcpy(getBytes(), other.getBytes(), length);
164 : }
165 0 : fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
166 0 : fHashCode = other.fHashCode;
167 : }
168 :
169 0 : return *this;
170 : }
171 :
172 : // Bitwise comparison for the collation keys.
173 : Collator::EComparisonResult
174 0 : CollationKey::compareTo(const CollationKey& target) const
175 : {
176 0 : UErrorCode errorCode = U_ZERO_ERROR;
177 0 : return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
178 : }
179 :
180 : // Bitwise comparison for the collation keys.
181 : UCollationResult
182 0 : CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
183 : {
184 0 : if(U_SUCCESS(status)) {
185 0 : const uint8_t *src = getBytes();
186 0 : const uint8_t *tgt = target.getBytes();
187 :
188 : // are we comparing the same string
189 0 : if (src == tgt)
190 0 : return UCOL_EQUAL;
191 :
192 : UCollationResult result;
193 :
194 : // are we comparing different lengths?
195 0 : int32_t minLength = getLength();
196 0 : int32_t targetLength = target.getLength();
197 0 : if (minLength < targetLength) {
198 0 : result = UCOL_LESS;
199 0 : } else if (minLength == targetLength) {
200 0 : result = UCOL_EQUAL;
201 : } else {
202 0 : minLength = targetLength;
203 0 : result = UCOL_GREATER;
204 : }
205 :
206 0 : if (minLength > 0) {
207 0 : int diff = uprv_memcmp(src, tgt, minLength);
208 0 : if (diff > 0) {
209 0 : return UCOL_GREATER;
210 : }
211 : else
212 0 : if (diff < 0) {
213 0 : return UCOL_LESS;
214 : }
215 : }
216 :
217 0 : return result;
218 : } else {
219 0 : return UCOL_EQUAL;
220 : }
221 : }
222 :
223 : #ifdef U_USE_COLLATION_KEY_DEPRECATES
224 : // Create a copy of the byte array.
225 : uint8_t*
226 : CollationKey::toByteArray(int32_t& count) const
227 : {
228 : uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
229 :
230 : if (result == NULL)
231 : {
232 : count = 0;
233 : }
234 : else
235 : {
236 : count = fCount;
237 : if (count > 0) {
238 : uprv_memcpy(result, fBytes, fCount);
239 : }
240 : }
241 :
242 : return result;
243 : }
244 : #endif
245 :
246 : static int32_t
247 0 : computeHashCode(const uint8_t *key, int32_t length) {
248 0 : const char *s = reinterpret_cast<const char *>(key);
249 : int32_t hash;
250 0 : if (s == NULL || length == 0) {
251 0 : hash = kEmptyHashCode;
252 : } else {
253 0 : hash = ustr_hashCharsN(s, length);
254 0 : if (hash == kInvalidHashCode || hash == kBogusHashCode) {
255 0 : hash = kEmptyHashCode;
256 : }
257 : }
258 0 : return hash;
259 : }
260 :
261 : int32_t
262 0 : CollationKey::hashCode() const
263 : {
264 : // (Cribbed from UnicodeString)
265 : // We cache the hashCode; when it becomes invalid, due to any change to the
266 : // string, we note this by setting it to kInvalidHashCode. [LIU]
267 :
268 : // Note: This method is semantically const, but physically non-const.
269 :
270 0 : if (fHashCode == kInvalidHashCode)
271 : {
272 0 : fHashCode = computeHashCode(getBytes(), getLength());
273 : }
274 :
275 0 : return fHashCode;
276 : }
277 :
278 : U_NAMESPACE_END
279 :
280 : U_CAPI int32_t U_EXPORT2
281 0 : ucol_keyHashCode(const uint8_t *key,
282 : int32_t length)
283 : {
284 0 : return icu::computeHashCode(key, length);
285 : }
286 :
287 : #endif /* #if !UCONFIG_NO_COLLATION */
|