Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : ******************************************************************************
5 : *
6 : * Copyright (C) 1997-2011, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : ******************************************************************************
10 : *
11 : * File CSTRING.C
12 : *
13 : * @author Helena Shih
14 : *
15 : * Modification History:
16 : *
17 : * Date Name Description
18 : * 6/18/98 hshih Created
19 : * 09/08/98 stephen Added include for ctype, for Mac Port
20 : * 11/15/99 helena Integrated S/390 IEEE changes.
21 : ******************************************************************************
22 : */
23 :
24 :
25 :
26 : #include <stdlib.h>
27 : #include <stdio.h>
28 : #include "unicode/utypes.h"
29 : #include "cmemory.h"
30 : #include "cstring.h"
31 : #include "uassert.h"
32 :
33 : /*
34 : * We hardcode case conversion for invariant characters to match our expectation
35 : * and the compiler execution charset.
36 : * This prevents problems on systems
37 : * - with non-default casing behavior, like Turkish system locales where
38 : * tolower('I') maps to dotless i and toupper('i') maps to dotted I
39 : * - where there are no lowercase Latin characters at all, or using different
40 : * codes (some old EBCDIC codepages)
41 : *
42 : * This works because the compiler usually runs on a platform where the execution
43 : * charset includes all of the invariant characters at their expected
44 : * code positions, so that the char * string literals in ICU code match
45 : * the char literals here.
46 : *
47 : * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48 : * and the set of uppercase Latin letters is discontiguous as well.
49 : */
50 :
51 : U_CAPI UBool U_EXPORT2
52 210 : uprv_isASCIILetter(char c) {
53 : #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54 : return
55 : ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
56 : ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
57 : #else
58 210 : return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
59 : #endif
60 : }
61 :
62 : U_CAPI char U_EXPORT2
63 192 : uprv_toupper(char c) {
64 : #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65 : if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
66 : c=(char)(c+('A'-'a'));
67 : }
68 : #else
69 192 : if('a'<=c && c<='z') {
70 0 : c=(char)(c+('A'-'a'));
71 : }
72 : #endif
73 192 : return c;
74 : }
75 :
76 :
77 : #if 0
78 : /*
79 : * Commented out because cstring.h defines uprv_tolower() to be
80 : * the same as either uprv_asciitolower() or uprv_ebcdictolower()
81 : * to reduce the amount of code to cover with tests.
82 : *
83 : * Note that this uprv_tolower() definition is likely to work for most
84 : * charset families, not just ASCII and EBCDIC, because its #else branch
85 : * is written generically.
86 : */
87 : U_CAPI char U_EXPORT2
88 : uprv_tolower(char c) {
89 : #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90 : if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
91 : c=(char)(c+('a'-'A'));
92 : }
93 : #else
94 : if('A'<=c && c<='Z') {
95 : c=(char)(c+('a'-'A'));
96 : }
97 : #endif
98 : return c;
99 : }
100 : #endif
101 :
102 : U_CAPI char U_EXPORT2
103 280 : uprv_asciitolower(char c) {
104 280 : if(0x41<=c && c<=0x5a) {
105 4 : c=(char)(c+0x20);
106 : }
107 280 : return c;
108 : }
109 :
110 : U_CAPI char U_EXPORT2
111 0 : uprv_ebcdictolower(char c) {
112 0 : if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
113 0 : (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
114 0 : (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
115 : ) {
116 0 : c=(char)(c-0x40);
117 : }
118 0 : return c;
119 : }
120 :
121 :
122 : U_CAPI char* U_EXPORT2
123 0 : T_CString_toLowerCase(char* str)
124 : {
125 0 : char* origPtr = str;
126 :
127 0 : if (str) {
128 0 : do
129 0 : *str = (char)uprv_tolower(*str);
130 0 : while (*(str++));
131 : }
132 :
133 0 : return origPtr;
134 : }
135 :
136 : U_CAPI char* U_EXPORT2
137 0 : T_CString_toUpperCase(char* str)
138 : {
139 0 : char* origPtr = str;
140 :
141 0 : if (str) {
142 0 : do
143 0 : *str = (char)uprv_toupper(*str);
144 0 : while (*(str++));
145 : }
146 :
147 0 : return origPtr;
148 : }
149 :
150 : /*
151 : * Takes a int32_t and fills in a char* string with that number "radix"-based.
152 : * Does not handle negative values (makes an empty string for them).
153 : * Writes at most 12 chars ("-2147483647" plus NUL).
154 : * Returns the length of the string (not including the NUL).
155 : */
156 : U_CAPI int32_t U_EXPORT2
157 0 : T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158 : {
159 : char tbuf[30];
160 0 : int32_t tbx = sizeof(tbuf);
161 : uint8_t digit;
162 0 : int32_t length = 0;
163 : uint32_t uval;
164 :
165 0 : U_ASSERT(radix>=2 && radix<=16);
166 0 : uval = (uint32_t) v;
167 0 : if(v<0 && radix == 10) {
168 : /* Only in base 10 do we conside numbers to be signed. */
169 0 : uval = (uint32_t)(-v);
170 0 : buffer[length++] = '-';
171 : }
172 :
173 0 : tbx = sizeof(tbuf)-1;
174 0 : tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
175 0 : do {
176 0 : digit = (uint8_t)(uval % radix);
177 0 : tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178 0 : uval = uval / radix;
179 0 : } while (uval != 0);
180 :
181 : /* copy converted number into user buffer */
182 0 : uprv_strcpy(buffer+length, tbuf+tbx);
183 0 : length += sizeof(tbuf) - tbx -1;
184 0 : return length;
185 : }
186 :
187 :
188 :
189 : /*
190 : * Takes a int64_t and fills in a char* string with that number "radix"-based.
191 : * Writes at most 21: chars ("-9223372036854775807" plus NUL).
192 : * Returns the length of the string, not including the terminating NULL.
193 : */
194 : U_CAPI int32_t U_EXPORT2
195 0 : T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196 : {
197 : char tbuf[30];
198 0 : int32_t tbx = sizeof(tbuf);
199 : uint8_t digit;
200 0 : int32_t length = 0;
201 : uint64_t uval;
202 :
203 0 : U_ASSERT(radix>=2 && radix<=16);
204 0 : uval = (uint64_t) v;
205 0 : if(v<0 && radix == 10) {
206 : /* Only in base 10 do we conside numbers to be signed. */
207 0 : uval = (uint64_t)(-v);
208 0 : buffer[length++] = '-';
209 : }
210 :
211 0 : tbx = sizeof(tbuf)-1;
212 0 : tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
213 0 : do {
214 0 : digit = (uint8_t)(uval % radix);
215 0 : tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216 0 : uval = uval / radix;
217 0 : } while (uval != 0);
218 :
219 : /* copy converted number into user buffer */
220 0 : uprv_strcpy(buffer+length, tbuf+tbx);
221 0 : length += sizeof(tbuf) - tbx -1;
222 0 : return length;
223 : }
224 :
225 :
226 : U_CAPI int32_t U_EXPORT2
227 0 : T_CString_stringToInteger(const char *integerString, int32_t radix)
228 : {
229 : char *end;
230 0 : return uprv_strtoul(integerString, &end, radix);
231 :
232 : }
233 :
234 : U_CAPI int U_EXPORT2
235 0 : uprv_stricmp(const char *str1, const char *str2) {
236 0 : if(str1==NULL) {
237 0 : if(str2==NULL) {
238 0 : return 0;
239 : } else {
240 0 : return -1;
241 : }
242 0 : } else if(str2==NULL) {
243 0 : return 1;
244 : } else {
245 : /* compare non-NULL strings lexically with lowercase */
246 : int rc;
247 : unsigned char c1, c2;
248 :
249 : for(;;) {
250 0 : c1=(unsigned char)*str1;
251 0 : c2=(unsigned char)*str2;
252 0 : if(c1==0) {
253 0 : if(c2==0) {
254 0 : return 0;
255 : } else {
256 0 : return -1;
257 : }
258 0 : } else if(c2==0) {
259 0 : return 1;
260 : } else {
261 : /* compare non-zero characters with lowercase */
262 0 : rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263 0 : if(rc!=0) {
264 0 : return rc;
265 : }
266 : }
267 0 : ++str1;
268 0 : ++str2;
269 : }
270 : }
271 : }
272 :
273 : U_CAPI int U_EXPORT2
274 2 : uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
275 2 : if(str1==NULL) {
276 0 : if(str2==NULL) {
277 0 : return 0;
278 : } else {
279 0 : return -1;
280 : }
281 2 : } else if(str2==NULL) {
282 0 : return 1;
283 : } else {
284 : /* compare non-NULL strings lexically with lowercase */
285 : int rc;
286 : unsigned char c1, c2;
287 :
288 2 : for(; n--;) {
289 2 : c1=(unsigned char)*str1;
290 2 : c2=(unsigned char)*str2;
291 2 : if(c1==0) {
292 0 : if(c2==0) {
293 0 : return 0;
294 : } else {
295 0 : return -1;
296 : }
297 2 : } else if(c2==0) {
298 0 : return 1;
299 : } else {
300 : /* compare non-zero characters with lowercase */
301 2 : rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302 2 : if(rc!=0) {
303 2 : return rc;
304 : }
305 : }
306 0 : ++str1;
307 0 : ++str2;
308 : }
309 : }
310 :
311 0 : return 0;
312 : }
313 :
314 : U_CAPI char* U_EXPORT2
315 0 : uprv_strdup(const char *src) {
316 0 : size_t len = uprv_strlen(src) + 1;
317 0 : char *dup = (char *) uprv_malloc(len);
318 :
319 0 : if (dup) {
320 0 : uprv_memcpy(dup, src, len);
321 : }
322 :
323 0 : return dup;
324 : }
325 :
326 : U_CAPI char* U_EXPORT2
327 0 : uprv_strndup(const char *src, int32_t n) {
328 : char *dup;
329 :
330 0 : if(n < 0) {
331 0 : dup = uprv_strdup(src);
332 : } else {
333 0 : dup = (char*)uprv_malloc(n+1);
334 0 : if (dup) {
335 0 : uprv_memcpy(dup, src, n);
336 0 : dup[n] = 0;
337 : }
338 : }
339 :
340 0 : return dup;
341 : }
|