Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 1999-2014, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : * file name: unistr_case.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:2
14 : *
15 : * created on: 2004aug19
16 : * created by: Markus W. Scherer
17 : *
18 : * Case-mapping functions moved here from unistr.cpp
19 : */
20 :
21 : #include "unicode/utypes.h"
22 : #include "unicode/casemap.h"
23 : #include "unicode/edits.h"
24 : #include "unicode/putil.h"
25 : #include "cstring.h"
26 : #include "cmemory.h"
27 : #include "unicode/ustring.h"
28 : #include "unicode/unistr.h"
29 : #include "unicode/uchar.h"
30 : #include "uassert.h"
31 : #include "ucasemap_imp.h"
32 : #include "uelement.h"
33 :
34 : U_NAMESPACE_BEGIN
35 :
36 : //========================================
37 : // Read-only implementation
38 : //========================================
39 :
40 : int8_t
41 0 : UnicodeString::doCaseCompare(int32_t start,
42 : int32_t length,
43 : const UChar *srcChars,
44 : int32_t srcStart,
45 : int32_t srcLength,
46 : uint32_t options) const
47 : {
48 : // compare illegal string values
49 : // treat const UChar *srcChars==NULL as an empty string
50 0 : if(isBogus()) {
51 0 : return -1;
52 : }
53 :
54 : // pin indices to legal values
55 0 : pinIndices(start, length);
56 :
57 0 : if(srcChars == NULL) {
58 0 : srcStart = srcLength = 0;
59 : }
60 :
61 : // get the correct pointer
62 0 : const UChar *chars = getArrayStart();
63 :
64 0 : chars += start;
65 0 : if(srcStart!=0) {
66 0 : srcChars += srcStart;
67 : }
68 :
69 0 : if(chars != srcChars) {
70 0 : UErrorCode errorCode=U_ZERO_ERROR;
71 0 : int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
72 0 : options|U_COMPARE_IGNORE_CASE, &errorCode);
73 0 : if(result!=0) {
74 0 : return (int8_t)(result >> 24 | 1);
75 : }
76 : } else {
77 : // get the srcLength if necessary
78 0 : if(srcLength < 0) {
79 0 : srcLength = u_strlen(srcChars + srcStart);
80 : }
81 0 : if(length != srcLength) {
82 0 : return (int8_t)((length - srcLength) >> 24 | 1);
83 : }
84 : }
85 0 : return 0;
86 : }
87 :
88 : //========================================
89 : // Write implementation
90 : //========================================
91 :
92 : UnicodeString &
93 0 : UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
94 : UStringCaseMapper *stringCaseMapper) {
95 0 : if(isEmpty() || !isWritable()) {
96 : // nothing to do
97 0 : return *this;
98 : }
99 :
100 : UChar oldBuffer[2 * US_STACKBUF_SIZE];
101 : UChar *oldArray;
102 0 : int32_t oldLength = length();
103 : int32_t newLength;
104 0 : UBool writable = isBufferWritable();
105 0 : UErrorCode errorCode = U_ZERO_ERROR;
106 :
107 : // Try to avoid heap-allocating a new character array for this string.
108 0 : if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
109 : // Short string: Copy the contents into a temporary buffer and
110 : // case-map back into the current array, or into the stack buffer.
111 0 : UChar *buffer = getArrayStart();
112 : int32_t capacity;
113 0 : oldArray = oldBuffer;
114 0 : u_memcpy(oldBuffer, buffer, oldLength);
115 0 : if (writable) {
116 0 : capacity = getCapacity();
117 : } else {
118 : // Switch from the read-only alias or shared heap buffer to the stack buffer.
119 0 : if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
120 0 : return *this;
121 : }
122 0 : U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
123 0 : buffer = fUnion.fStackFields.fBuffer;
124 0 : capacity = US_STACKBUF_SIZE;
125 : }
126 : newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
127 : buffer, capacity,
128 0 : oldArray, oldLength, NULL, errorCode);
129 0 : if (U_SUCCESS(errorCode)) {
130 0 : setLength(newLength);
131 0 : return *this;
132 0 : } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
133 : // common overflow handling below
134 : } else {
135 0 : setToBogus();
136 0 : return *this;
137 : }
138 : } else {
139 : // Longer string or read-only buffer:
140 : // Collect only changes and then apply them to this string.
141 : // Case mapping often changes only small parts of a string,
142 : // and often does not change its length.
143 0 : oldArray = getArrayStart();
144 0 : Edits edits;
145 : UChar replacementChars[200];
146 0 : stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
147 : replacementChars, UPRV_LENGTHOF(replacementChars),
148 0 : oldArray, oldLength, &edits, errorCode);
149 0 : if (U_SUCCESS(errorCode)) {
150 : // Grow the buffer at most once, not for multiple doReplace() calls.
151 0 : newLength = oldLength + edits.lengthDelta();
152 0 : if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
153 0 : return *this;
154 : }
155 0 : for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
156 : doReplace(ei.destinationIndex(), ei.oldLength(),
157 0 : replacementChars, ei.replacementIndex(), ei.newLength());
158 : }
159 0 : if (U_FAILURE(errorCode)) {
160 0 : setToBogus();
161 : }
162 0 : return *this;
163 0 : } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
164 : // common overflow handling below
165 0 : newLength = oldLength + edits.lengthDelta();
166 : } else {
167 0 : setToBogus();
168 0 : return *this;
169 : }
170 : }
171 :
172 : // Handle buffer overflow, newLength is known.
173 : // We need to allocate a new buffer for the internal string case mapping function.
174 : // This is very similar to how doReplace() keeps the old array pointer
175 : // and deletes the old array itself after it is done.
176 : // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
177 0 : int32_t *bufferToDelete = 0;
178 0 : if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
179 0 : return *this;
180 : }
181 0 : errorCode = U_ZERO_ERROR;
182 0 : newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
183 : getArrayStart(), getCapacity(),
184 0 : oldArray, oldLength, NULL, errorCode);
185 0 : if (bufferToDelete) {
186 0 : uprv_free(bufferToDelete);
187 : }
188 0 : if (U_SUCCESS(errorCode)) {
189 0 : setLength(newLength);
190 : } else {
191 0 : setToBogus();
192 : }
193 0 : return *this;
194 : }
195 :
196 : UnicodeString &
197 0 : UnicodeString::foldCase(uint32_t options) {
198 0 : return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
199 : }
200 :
201 : U_NAMESPACE_END
202 :
203 : // Defined here to reduce dependencies on break iterator
204 : U_CAPI int32_t U_EXPORT2
205 0 : uhash_hashCaselessUnicodeString(const UElement key) {
206 : U_NAMESPACE_USE
207 0 : const UnicodeString *str = (const UnicodeString*) key.pointer;
208 0 : if (str == NULL) {
209 0 : return 0;
210 : }
211 : // Inefficient; a better way would be to have a hash function in
212 : // UnicodeString that does case folding on the fly.
213 0 : UnicodeString copy(*str);
214 0 : return copy.foldCase().hashCode();
215 : }
216 :
217 : // Defined here to reduce dependencies on break iterator
218 : U_CAPI UBool U_EXPORT2
219 0 : uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
220 : U_NAMESPACE_USE
221 0 : const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
222 0 : const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
223 0 : if (str1 == str2) {
224 0 : return TRUE;
225 : }
226 0 : if (str1 == NULL || str2 == NULL) {
227 0 : return FALSE;
228 : }
229 0 : return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
230 : }
|