Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 2001-2012, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : * file name: ustr_wcs.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:4
14 : *
15 : * created on: 2004sep07
16 : * created by: Markus W. Scherer
17 : *
18 : * u_strToWCS() and u_strFromWCS() functions
19 : * moved here from ustrtrns.c for better modularization.
20 : */
21 :
22 : #include "unicode/utypes.h"
23 : #include "unicode/ustring.h"
24 : #include "cstring.h"
25 : #include "cwchar.h"
26 : #include "cmemory.h"
27 : #include "ustr_imp.h"
28 : #include "ustr_cnv.h"
29 :
30 : #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
31 :
32 : #define _STACK_BUFFER_CAPACITY 1000
33 : #define _BUFFER_CAPACITY_MULTIPLIER 2
34 :
35 : #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
36 : // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
37 : // Then we could change this to work only with wchar_t buffers.
38 : static inline UBool
39 : u_growAnyBufferFromStatic(void *context,
40 : void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
41 : int32_t length, int32_t size) {
42 : // Use char* not void* to avoid the compiler's strict-aliasing assumptions
43 : // and related warnings.
44 : char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
45 : if(newBuffer!=NULL) {
46 : if(length>0) {
47 : uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
48 : }
49 : *pCapacity=reqCapacity;
50 : } else {
51 : *pCapacity=0;
52 : }
53 :
54 : /* release the old pBuffer if it was not statically allocated */
55 : if(*pBuffer!=(char *)context) {
56 : uprv_free(*pBuffer);
57 : }
58 :
59 : *pBuffer=newBuffer;
60 : return (UBool)(newBuffer!=NULL);
61 : }
62 :
63 : /* helper function */
64 : static wchar_t*
65 : _strToWCS(wchar_t *dest,
66 : int32_t destCapacity,
67 : int32_t *pDestLength,
68 : const UChar *src,
69 : int32_t srcLength,
70 : UErrorCode *pErrorCode){
71 :
72 : char stackBuffer [_STACK_BUFFER_CAPACITY];
73 : char* tempBuf = stackBuffer;
74 : int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
75 : char* tempBufLimit = stackBuffer + tempBufCapacity;
76 : UConverter* conv = NULL;
77 : char* saveBuf = tempBuf;
78 : wchar_t* intTarget=NULL;
79 : int32_t intTargetCapacity=0;
80 : int count=0,retVal=0;
81 :
82 : const UChar *pSrcLimit =NULL;
83 : const UChar *pSrc = src;
84 :
85 : conv = u_getDefaultConverter(pErrorCode);
86 :
87 : if(U_FAILURE(*pErrorCode)){
88 : return NULL;
89 : }
90 :
91 : if(srcLength == -1){
92 : srcLength = u_strlen(pSrc);
93 : }
94 :
95 : pSrcLimit = pSrc + srcLength;
96 :
97 : for(;;) {
98 : /* reset the error state */
99 : *pErrorCode = U_ZERO_ERROR;
100 :
101 : /* convert to chars using default converter */
102 : ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
103 : count =(tempBuf - saveBuf);
104 :
105 : /* This should rarely occur */
106 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
107 : tempBuf = saveBuf;
108 :
109 : /* we dont have enough room on the stack grow the buffer */
110 : int32_t newCapacity = 2 * srcLength;
111 : if(newCapacity <= tempBufCapacity) {
112 : newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
113 : }
114 : if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
115 : newCapacity, count, 1)) {
116 : goto cleanup;
117 : }
118 :
119 : saveBuf = tempBuf;
120 : tempBufLimit = tempBuf + tempBufCapacity;
121 : tempBuf = tempBuf + count;
122 :
123 : } else {
124 : break;
125 : }
126 : }
127 :
128 : if(U_FAILURE(*pErrorCode)){
129 : goto cleanup;
130 : }
131 :
132 : /* done with conversion null terminate the char buffer */
133 : if(count>=tempBufCapacity){
134 : tempBuf = saveBuf;
135 : /* we dont have enough room on the stack grow the buffer */
136 : if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
137 : count+1, count, 1)) {
138 : goto cleanup;
139 : }
140 : saveBuf = tempBuf;
141 : }
142 :
143 : saveBuf[count]=0;
144 :
145 :
146 : /* allocate more space than required
147 : * here we assume that every char requires
148 : * no more than 2 wchar_ts
149 : */
150 : intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
151 : intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
152 :
153 : if(intTarget){
154 :
155 : int32_t nulLen = 0;
156 : int32_t remaining = intTargetCapacity;
157 : wchar_t* pIntTarget=intTarget;
158 : tempBuf = saveBuf;
159 :
160 : /* now convert the mbs to wcs */
161 : for(;;){
162 :
163 : /* we can call the system API since we are sure that
164 : * there is atleast 1 null in the input
165 : */
166 : retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
167 :
168 : if(retVal==-1){
169 : *pErrorCode = U_INVALID_CHAR_FOUND;
170 : break;
171 : }else if(retVal== remaining){/* should never occur */
172 : int numWritten = (pIntTarget-intTarget);
173 : u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
174 : &intTargetCapacity,
175 : intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
176 : numWritten,
177 : sizeof(wchar_t));
178 : pIntTarget = intTarget;
179 : remaining=intTargetCapacity;
180 :
181 : if(nulLen!=count){ /*there are embedded nulls*/
182 : pIntTarget+=numWritten;
183 : remaining-=numWritten;
184 : }
185 :
186 : }else{
187 : int32_t nulVal;
188 : /*scan for nulls */
189 : /* we donot check for limit since tempBuf is null terminated */
190 : while(tempBuf[nulLen++] != 0){
191 : }
192 : nulVal = (nulLen < srcLength) ? 1 : 0;
193 : pIntTarget = pIntTarget + retVal+nulVal;
194 : remaining -=(retVal+nulVal);
195 :
196 : /* check if we have reached the source limit*/
197 : if(nulLen>=(count)){
198 : break;
199 : }
200 : }
201 : }
202 : count = (int32_t)(pIntTarget-intTarget);
203 :
204 : if(0 < count && count <= destCapacity){
205 : uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
206 : }
207 :
208 : if(pDestLength){
209 : *pDestLength = count;
210 : }
211 :
212 : /* free the allocated memory */
213 : uprv_free(intTarget);
214 :
215 : }else{
216 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
217 : }
218 : cleanup:
219 : /* are we still using stack buffer */
220 : if(stackBuffer != saveBuf){
221 : uprv_free(saveBuf);
222 : }
223 : u_terminateWChars(dest,destCapacity,count,pErrorCode);
224 :
225 : u_releaseDefaultConverter(conv);
226 :
227 : return dest;
228 : }
229 : #endif
230 :
231 : U_CAPI wchar_t* U_EXPORT2
232 0 : u_strToWCS(wchar_t *dest,
233 : int32_t destCapacity,
234 : int32_t *pDestLength,
235 : const UChar *src,
236 : int32_t srcLength,
237 : UErrorCode *pErrorCode){
238 :
239 : /* args check */
240 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
241 0 : return NULL;
242 : }
243 :
244 0 : if( (src==NULL && srcLength!=0) || srcLength < -1 ||
245 0 : (destCapacity<0) || (dest == NULL && destCapacity > 0)
246 : ) {
247 0 : *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
248 0 : return NULL;
249 : }
250 :
251 : #ifdef U_WCHAR_IS_UTF16
252 : /* wchar_t is UTF-16 just do a memcpy */
253 : if(srcLength == -1){
254 : srcLength = u_strlen(src);
255 : }
256 : if(0 < srcLength && srcLength <= destCapacity){
257 : u_memcpy((UChar *)dest, src, srcLength);
258 : }
259 : if(pDestLength){
260 : *pDestLength = srcLength;
261 : }
262 :
263 : u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
264 :
265 : return dest;
266 :
267 : #elif defined U_WCHAR_IS_UTF32
268 :
269 : return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
270 0 : src, srcLength, pErrorCode);
271 :
272 : #else
273 :
274 : return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
275 :
276 : #endif
277 :
278 : }
279 :
280 : #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
281 : /* helper function */
282 : static UChar*
283 : _strFromWCS( UChar *dest,
284 : int32_t destCapacity,
285 : int32_t *pDestLength,
286 : const wchar_t *src,
287 : int32_t srcLength,
288 : UErrorCode *pErrorCode)
289 : {
290 : int32_t retVal =0, count =0 ;
291 : UConverter* conv = NULL;
292 : UChar* pTarget = NULL;
293 : UChar* pTargetLimit = NULL;
294 : UChar* target = NULL;
295 :
296 : UChar uStack [_STACK_BUFFER_CAPACITY];
297 :
298 : wchar_t wStack[_STACK_BUFFER_CAPACITY];
299 : wchar_t* pWStack = wStack;
300 :
301 :
302 : char cStack[_STACK_BUFFER_CAPACITY];
303 : int32_t cStackCap = _STACK_BUFFER_CAPACITY;
304 : char* pCSrc=cStack;
305 : char* pCSave=pCSrc;
306 : char* pCSrcLimit=NULL;
307 :
308 : const wchar_t* pSrc = src;
309 : const wchar_t* pSrcLimit = NULL;
310 :
311 : if(srcLength ==-1){
312 : /* if the wchar_t source is null terminated we can safely
313 : * assume that there are no embedded nulls, this is a fast
314 : * path for null terminated strings.
315 : */
316 : for(;;){
317 : /* convert wchars to chars */
318 : retVal = uprv_wcstombs(pCSrc,src, cStackCap);
319 :
320 : if(retVal == -1){
321 : *pErrorCode = U_ILLEGAL_CHAR_FOUND;
322 : goto cleanup;
323 : }else if(retVal >= (cStackCap-1)){
324 : /* Should rarely occur */
325 : u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
326 : cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
327 : pCSave = pCSrc;
328 : }else{
329 : /* converted every thing */
330 : pCSrc = pCSrc+retVal;
331 : break;
332 : }
333 : }
334 :
335 : }else{
336 : /* here the source is not null terminated
337 : * so it may have nulls embeded and we need to
338 : * do some extra processing
339 : */
340 : int32_t remaining =cStackCap;
341 :
342 : pSrcLimit = src + srcLength;
343 :
344 : for(;;){
345 : register int32_t nulLen = 0;
346 :
347 : /* find nulls in the string */
348 : while(nulLen<srcLength && pSrc[nulLen++]!=0){
349 : }
350 :
351 : if((pSrc+nulLen) < pSrcLimit){
352 : /* check if we have enough room in pCSrc */
353 : if(remaining < (nulLen * MB_CUR_MAX)){
354 : /* should rarely occur */
355 : int32_t len = (pCSrc-pCSave);
356 : pCSrc = pCSave;
357 : /* we do not have enough room so grow the buffer*/
358 : u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
359 : _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
360 :
361 : pCSave = pCSrc;
362 : pCSrc = pCSave+len;
363 : remaining = cStackCap-(pCSrc - pCSave);
364 : }
365 :
366 : /* we have found a null so convert the
367 : * chunk from begining of non-null char to null
368 : */
369 : retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
370 :
371 : if(retVal==-1){
372 : /* an error occurred bail out */
373 : *pErrorCode = U_ILLEGAL_CHAR_FOUND;
374 : goto cleanup;
375 : }
376 :
377 : pCSrc += retVal+1 /* already null terminated */;
378 :
379 : pSrc += nulLen; /* skip past the null */
380 : srcLength-=nulLen; /* decrement the srcLength */
381 : remaining -= (pCSrc-pCSave);
382 :
383 :
384 : }else{
385 : /* the source is not null terminated and we are
386 : * end of source so we copy the source to a temp buffer
387 : * null terminate it and convert wchar_ts to chars
388 : */
389 : if(nulLen >= _STACK_BUFFER_CAPACITY){
390 : /* Should rarely occcur */
391 : /* allocate new buffer buffer */
392 : pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
393 : if(pWStack==NULL){
394 : *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
395 : goto cleanup;
396 : }
397 : }
398 : if(nulLen>0){
399 : /* copy the contents to tempStack */
400 : uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
401 : }
402 :
403 : /* null terminate the tempBuffer */
404 : pWStack[nulLen] =0 ;
405 :
406 : if(remaining < (nulLen * MB_CUR_MAX)){
407 : /* Should rarely occur */
408 : int32_t len = (pCSrc-pCSave);
409 : pCSrc = pCSave;
410 : /* we do not have enough room so grow the buffer*/
411 : u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
412 : cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
413 :
414 : pCSave = pCSrc;
415 : pCSrc = pCSave+len;
416 : remaining = cStackCap-(pCSrc - pCSave);
417 : }
418 : /* convert to chars */
419 : retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
420 :
421 : pCSrc += retVal;
422 : pSrc += nulLen;
423 : srcLength-=nulLen; /* decrement the srcLength */
424 : break;
425 : }
426 : }
427 : }
428 :
429 : /* OK..now we have converted from wchar_ts to chars now
430 : * convert chars to UChars
431 : */
432 : pCSrcLimit = pCSrc;
433 : pCSrc = pCSave;
434 : pTarget = target= dest;
435 : pTargetLimit = dest + destCapacity;
436 :
437 : conv= u_getDefaultConverter(pErrorCode);
438 :
439 : if(U_FAILURE(*pErrorCode)|| conv==NULL){
440 : goto cleanup;
441 : }
442 :
443 : for(;;) {
444 :
445 : *pErrorCode = U_ZERO_ERROR;
446 :
447 : /* convert to stack buffer*/
448 : ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
449 :
450 : /* increment count to number written to stack */
451 : count+= pTarget - target;
452 :
453 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
454 : target = uStack;
455 : pTarget = uStack;
456 : pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
457 : } else {
458 : break;
459 : }
460 :
461 : }
462 :
463 : if(pDestLength){
464 : *pDestLength =count;
465 : }
466 :
467 : u_terminateUChars(dest,destCapacity,count,pErrorCode);
468 :
469 : cleanup:
470 :
471 : if(cStack != pCSave){
472 : uprv_free(pCSave);
473 : }
474 :
475 : if(wStack != pWStack){
476 : uprv_free(pWStack);
477 : }
478 :
479 : u_releaseDefaultConverter(conv);
480 :
481 : return dest;
482 : }
483 : #endif
484 :
485 : U_CAPI UChar* U_EXPORT2
486 0 : u_strFromWCS(UChar *dest,
487 : int32_t destCapacity,
488 : int32_t *pDestLength,
489 : const wchar_t *src,
490 : int32_t srcLength,
491 : UErrorCode *pErrorCode)
492 : {
493 :
494 : /* args check */
495 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
496 0 : return NULL;
497 : }
498 :
499 0 : if( (src==NULL && srcLength!=0) || srcLength < -1 ||
500 0 : (destCapacity<0) || (dest == NULL && destCapacity > 0)
501 : ) {
502 0 : *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
503 0 : return NULL;
504 : }
505 :
506 : #ifdef U_WCHAR_IS_UTF16
507 : /* wchar_t is UTF-16 just do a memcpy */
508 : if(srcLength == -1){
509 : srcLength = u_strlen((const UChar *)src);
510 : }
511 : if(0 < srcLength && srcLength <= destCapacity){
512 : u_memcpy(dest, (const UChar *)src, srcLength);
513 : }
514 : if(pDestLength){
515 : *pDestLength = srcLength;
516 : }
517 :
518 : u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
519 :
520 : return dest;
521 :
522 : #elif defined U_WCHAR_IS_UTF32
523 :
524 : return u_strFromUTF32(dest, destCapacity, pDestLength,
525 0 : (UChar32*)src, srcLength, pErrorCode);
526 :
527 : #else
528 :
529 : return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
530 :
531 : #endif
532 :
533 : }
534 :
535 : #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
|