Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : ******************************************************************************
5 : *
6 : * Copyright (C) 1998-2016, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : ******************************************************************************
10 : *
11 : * ucnv.c:
12 : * Implements APIs for the ICU's codeset conversion library;
13 : * mostly calls through internal functions;
14 : * created by Bertrand A. Damiba
15 : *
16 : * Modification History:
17 : *
18 : * Date Name Description
19 : * 04/04/99 helena Fixed internal header inclusion.
20 : * 05/09/00 helena Added implementation to handle fallback mappings.
21 : * 06/20/2000 helena OS/400 port changes; mostly typecast.
22 : */
23 :
24 : #include "unicode/utypes.h"
25 :
26 : #if !UCONFIG_NO_CONVERSION
27 :
28 : #include "unicode/ustring.h"
29 : #include "unicode/ucnv.h"
30 : #include "unicode/ucnv_err.h"
31 : #include "unicode/uset.h"
32 : #include "unicode/utf.h"
33 : #include "unicode/utf16.h"
34 : #include "putilimp.h"
35 : #include "cmemory.h"
36 : #include "cstring.h"
37 : #include "uassert.h"
38 : #include "utracimp.h"
39 : #include "ustr_imp.h"
40 : #include "ucnv_imp.h"
41 : #include "ucnv_cnv.h"
42 : #include "ucnv_bld.h"
43 :
44 : /* size of intermediate and preflighting buffers in ucnv_convert() */
45 : #define CHUNK_SIZE 1024
46 :
47 : typedef struct UAmbiguousConverter {
48 : const char *name;
49 : const UChar variant5c;
50 : } UAmbiguousConverter;
51 :
52 : static const UAmbiguousConverter ambiguousConverters[]={
53 : { "ibm-897_P100-1995", 0xa5 },
54 : { "ibm-942_P120-1999", 0xa5 },
55 : { "ibm-943_P130-1999", 0xa5 },
56 : { "ibm-946_P100-1995", 0xa5 },
57 : { "ibm-33722_P120-1999", 0xa5 },
58 : { "ibm-1041_P100-1995", 0xa5 },
59 : /*{ "ibm-54191_P100-2006", 0xa5 },*/
60 : /*{ "ibm-62383_P100-2007", 0xa5 },*/
61 : /*{ "ibm-891_P100-1995", 0x20a9 },*/
62 : { "ibm-944_P100-1995", 0x20a9 },
63 : { "ibm-949_P110-1999", 0x20a9 },
64 : { "ibm-1363_P110-1997", 0x20a9 },
65 : { "ISO_2022,locale=ko,version=0", 0x20a9 },
66 : { "ibm-1088_P100-1995", 0x20a9 }
67 : };
68 :
69 : /*Calls through createConverter */
70 : U_CAPI UConverter* U_EXPORT2
71 0 : ucnv_open (const char *name,
72 : UErrorCode * err)
73 : {
74 : UConverter *r;
75 :
76 0 : if (err == NULL || U_FAILURE (*err)) {
77 0 : return NULL;
78 : }
79 :
80 0 : r = ucnv_createConverter(NULL, name, err);
81 0 : return r;
82 : }
83 :
84 : U_CAPI UConverter* U_EXPORT2
85 0 : ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
86 : {
87 0 : return ucnv_createConverterFromPackage(packageName, converterName, err);
88 : }
89 :
90 : /*Extracts the UChar* to a char* and calls through createConverter */
91 : U_CAPI UConverter* U_EXPORT2
92 0 : ucnv_openU (const UChar * name,
93 : UErrorCode * err)
94 : {
95 : char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
96 :
97 0 : if (err == NULL || U_FAILURE(*err))
98 0 : return NULL;
99 0 : if (name == NULL)
100 0 : return ucnv_open (NULL, err);
101 0 : if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
102 : {
103 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
104 0 : return NULL;
105 : }
106 0 : return ucnv_open(u_austrcpy(asciiName, name), err);
107 : }
108 :
109 : /* Copy the string that is represented by the UConverterPlatform enum
110 : * @param platformString An output buffer
111 : * @param platform An enum representing a platform
112 : * @return the length of the copied string.
113 : */
114 : static int32_t
115 0 : ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
116 : {
117 0 : switch (pltfrm)
118 : {
119 : case UCNV_IBM:
120 0 : uprv_strcpy(platformString, "ibm-");
121 0 : return 4;
122 : case UCNV_UNKNOWN:
123 0 : break;
124 : }
125 :
126 : /* default to empty string */
127 0 : *platformString = 0;
128 0 : return 0;
129 : }
130 :
131 : /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
132 : *through createConverter*/
133 : U_CAPI UConverter* U_EXPORT2
134 0 : ucnv_openCCSID (int32_t codepage,
135 : UConverterPlatform platform,
136 : UErrorCode * err)
137 : {
138 : char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
139 : int32_t myNameLen;
140 :
141 0 : if (err == NULL || U_FAILURE (*err))
142 0 : return NULL;
143 :
144 : /* ucnv_copyPlatformString could return "ibm-" or "cp" */
145 0 : myNameLen = ucnv_copyPlatformString(myName, platform);
146 0 : T_CString_integerToString(myName + myNameLen, codepage, 10);
147 :
148 0 : return ucnv_createConverter(NULL, myName, err);
149 : }
150 :
151 : /* Creating a temporary stack-based object that can be used in one thread,
152 : and created from a converter that is shared across threads.
153 : */
154 :
155 : U_CAPI UConverter* U_EXPORT2
156 0 : ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
157 : {
158 : UConverter *localConverter, *allocatedConverter;
159 : int32_t stackBufferSize;
160 : int32_t bufferSizeNeeded;
161 0 : char *stackBufferChars = (char *)stackBuffer;
162 : UErrorCode cbErr;
163 : UConverterToUnicodeArgs toUArgs = {
164 : sizeof(UConverterToUnicodeArgs),
165 : TRUE,
166 : NULL,
167 : NULL,
168 : NULL,
169 : NULL,
170 : NULL,
171 : NULL
172 0 : };
173 : UConverterFromUnicodeArgs fromUArgs = {
174 : sizeof(UConverterFromUnicodeArgs),
175 : TRUE,
176 : NULL,
177 : NULL,
178 : NULL,
179 : NULL,
180 : NULL,
181 : NULL
182 0 : };
183 :
184 : UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
185 :
186 0 : if (status == NULL || U_FAILURE(*status)){
187 : UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
188 0 : return NULL;
189 : }
190 :
191 0 : if (cnv == NULL) {
192 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
193 : UTRACE_EXIT_STATUS(*status);
194 0 : return NULL;
195 : }
196 :
197 : UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
198 : ucnv_getName(cnv, status), cnv, stackBuffer);
199 :
200 0 : if (cnv->sharedData->impl->safeClone != NULL) {
201 : /* call the custom safeClone function for sizing */
202 0 : bufferSizeNeeded = 0;
203 0 : cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
204 0 : if (U_FAILURE(*status)) {
205 : UTRACE_EXIT_STATUS(*status);
206 0 : return NULL;
207 : }
208 : }
209 : else
210 : {
211 : /* inherent sizing */
212 0 : bufferSizeNeeded = sizeof(UConverter);
213 : }
214 :
215 0 : if (pBufferSize == NULL) {
216 0 : stackBufferSize = 1;
217 0 : pBufferSize = &stackBufferSize;
218 : } else {
219 0 : stackBufferSize = *pBufferSize;
220 0 : if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
221 0 : *pBufferSize = bufferSizeNeeded;
222 : UTRACE_EXIT_VALUE(bufferSizeNeeded);
223 0 : return NULL;
224 : }
225 : }
226 :
227 :
228 : /* Pointers on 64-bit platforms need to be aligned
229 : * on a 64-bit boundary in memory.
230 : */
231 0 : if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
232 0 : int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
233 0 : if(stackBufferSize > offsetUp) {
234 0 : stackBufferSize -= offsetUp;
235 0 : stackBufferChars += offsetUp;
236 : } else {
237 : /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
238 0 : stackBufferSize = 1;
239 : }
240 : }
241 :
242 0 : stackBuffer = (void *)stackBufferChars;
243 :
244 : /* Now, see if we must allocate any memory */
245 0 : if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL)
246 : {
247 : /* allocate one here...*/
248 0 : localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
249 :
250 0 : if(localConverter == NULL) {
251 0 : *status = U_MEMORY_ALLOCATION_ERROR;
252 : UTRACE_EXIT_STATUS(*status);
253 0 : return NULL;
254 : }
255 0 : *status = U_SAFECLONE_ALLOCATED_WARNING;
256 :
257 : /* record the fact that memory was allocated */
258 0 : *pBufferSize = bufferSizeNeeded;
259 : } else {
260 : /* just use the stack buffer */
261 0 : localConverter = (UConverter*) stackBuffer;
262 0 : allocatedConverter = NULL;
263 : }
264 :
265 0 : uprv_memset(localConverter, 0, bufferSizeNeeded);
266 :
267 : /* Copy initial state */
268 0 : uprv_memcpy(localConverter, cnv, sizeof(UConverter));
269 0 : localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
270 :
271 : /* copy the substitution string */
272 0 : if (cnv->subChars == (uint8_t *)cnv->subUChars) {
273 0 : localConverter->subChars = (uint8_t *)localConverter->subUChars;
274 : } else {
275 0 : localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
276 0 : if (localConverter->subChars == NULL) {
277 0 : uprv_free(allocatedConverter);
278 : UTRACE_EXIT_STATUS(*status);
279 0 : return NULL;
280 : }
281 0 : uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
282 : }
283 :
284 : /* now either call the safeclone fcn or not */
285 0 : if (cnv->sharedData->impl->safeClone != NULL) {
286 : /* call the custom safeClone function */
287 0 : localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
288 : }
289 :
290 0 : if(localConverter==NULL || U_FAILURE(*status)) {
291 0 : if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
292 0 : uprv_free(allocatedConverter->subChars);
293 : }
294 0 : uprv_free(allocatedConverter);
295 : UTRACE_EXIT_STATUS(*status);
296 0 : return NULL;
297 : }
298 :
299 : /* increment refcount of shared data if needed */
300 0 : if (cnv->sharedData->isReferenceCounted) {
301 0 : ucnv_incrementRefCount(cnv->sharedData);
302 : }
303 :
304 0 : if(localConverter == (UConverter*)stackBuffer) {
305 : /* we're using user provided data - set to not destroy */
306 0 : localConverter->isCopyLocal = TRUE;
307 : }
308 :
309 : /* allow callback functions to handle any memory allocation */
310 0 : toUArgs.converter = fromUArgs.converter = localConverter;
311 0 : cbErr = U_ZERO_ERROR;
312 0 : cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
313 0 : cbErr = U_ZERO_ERROR;
314 0 : cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
315 :
316 : UTRACE_EXIT_PTR_STATUS(localConverter, *status);
317 0 : return localConverter;
318 : }
319 :
320 :
321 :
322 : /*Decreases the reference counter in the shared immutable section of the object
323 : *and frees the mutable part*/
324 :
325 : U_CAPI void U_EXPORT2
326 0 : ucnv_close (UConverter * converter)
327 : {
328 0 : UErrorCode errorCode = U_ZERO_ERROR;
329 :
330 : UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
331 :
332 0 : if (converter == NULL)
333 : {
334 : UTRACE_EXIT();
335 0 : return;
336 : }
337 :
338 : UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
339 : ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
340 :
341 : /* In order to speed up the close, only call the callbacks when they have been changed.
342 : This performance check will only work when the callbacks are set within a shared library
343 : or from user code that statically links this code. */
344 : /* first, notify the callback functions that the converter is closed */
345 0 : if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
346 : UConverterToUnicodeArgs toUArgs = {
347 : sizeof(UConverterToUnicodeArgs),
348 : TRUE,
349 : NULL,
350 : NULL,
351 : NULL,
352 : NULL,
353 : NULL,
354 : NULL
355 0 : };
356 :
357 0 : toUArgs.converter = converter;
358 0 : errorCode = U_ZERO_ERROR;
359 0 : converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
360 : }
361 0 : if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
362 : UConverterFromUnicodeArgs fromUArgs = {
363 : sizeof(UConverterFromUnicodeArgs),
364 : TRUE,
365 : NULL,
366 : NULL,
367 : NULL,
368 : NULL,
369 : NULL,
370 : NULL
371 0 : };
372 0 : fromUArgs.converter = converter;
373 0 : errorCode = U_ZERO_ERROR;
374 0 : converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
375 : }
376 :
377 0 : if (converter->sharedData->impl->close != NULL) {
378 0 : converter->sharedData->impl->close(converter);
379 : }
380 :
381 0 : if (converter->subChars != (uint8_t *)converter->subUChars) {
382 0 : uprv_free(converter->subChars);
383 : }
384 :
385 0 : if (converter->sharedData->isReferenceCounted) {
386 0 : ucnv_unloadSharedDataIfReady(converter->sharedData);
387 : }
388 :
389 0 : if(!converter->isCopyLocal){
390 0 : uprv_free(converter);
391 : }
392 :
393 : UTRACE_EXIT();
394 : }
395 :
396 : /*returns a single Name from the list, will return NULL if out of bounds
397 : */
398 : U_CAPI const char* U_EXPORT2
399 0 : ucnv_getAvailableName (int32_t n)
400 : {
401 0 : if (0 <= n && n <= 0xffff) {
402 0 : UErrorCode err = U_ZERO_ERROR;
403 0 : const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
404 0 : if (U_SUCCESS(err)) {
405 0 : return name;
406 : }
407 : }
408 0 : return NULL;
409 : }
410 :
411 : U_CAPI int32_t U_EXPORT2
412 0 : ucnv_countAvailable ()
413 : {
414 0 : UErrorCode err = U_ZERO_ERROR;
415 0 : return ucnv_bld_countAvailableConverters(&err);
416 : }
417 :
418 : U_CAPI void U_EXPORT2
419 0 : ucnv_getSubstChars (const UConverter * converter,
420 : char *mySubChar,
421 : int8_t * len,
422 : UErrorCode * err)
423 : {
424 0 : if (U_FAILURE (*err))
425 0 : return;
426 :
427 0 : if (converter->subCharLen <= 0) {
428 : /* Unicode string or empty string from ucnv_setSubstString(). */
429 0 : *len = 0;
430 0 : return;
431 : }
432 :
433 0 : if (*len < converter->subCharLen) /*not enough space in subChars */
434 : {
435 0 : *err = U_INDEX_OUTOFBOUNDS_ERROR;
436 0 : return;
437 : }
438 :
439 0 : uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
440 0 : *len = converter->subCharLen; /*store # of bytes copied to buffer */
441 : }
442 :
443 : U_CAPI void U_EXPORT2
444 0 : ucnv_setSubstChars (UConverter * converter,
445 : const char *mySubChar,
446 : int8_t len,
447 : UErrorCode * err)
448 : {
449 0 : if (U_FAILURE (*err))
450 0 : return;
451 :
452 : /*Makes sure that the subChar is within the codepages char length boundaries */
453 0 : if ((len > converter->sharedData->staticData->maxBytesPerChar)
454 0 : || (len < converter->sharedData->staticData->minBytesPerChar))
455 : {
456 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
457 0 : return;
458 : }
459 :
460 0 : uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
461 0 : converter->subCharLen = len; /*sets the new len */
462 :
463 : /*
464 : * There is currently (2001Feb) no separate API to set/get subChar1.
465 : * In order to always have subChar written after it is explicitly set,
466 : * we set subChar1 to 0.
467 : */
468 0 : converter->subChar1 = 0;
469 :
470 0 : return;
471 : }
472 :
473 : U_CAPI void U_EXPORT2
474 0 : ucnv_setSubstString(UConverter *cnv,
475 : const UChar *s,
476 : int32_t length,
477 : UErrorCode *err) {
478 : UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
479 : char chars[UCNV_ERROR_BUFFER_LENGTH];
480 :
481 : UConverter *clone;
482 : uint8_t *subChars;
483 : int32_t cloneSize, length8;
484 :
485 : /* Let the following functions check all arguments. */
486 0 : cloneSize = sizeof(cloneBuffer);
487 0 : clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
488 0 : ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
489 0 : length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
490 0 : ucnv_close(clone);
491 0 : if (U_FAILURE(*err)) {
492 0 : return;
493 : }
494 :
495 0 : if (cnv->sharedData->impl->writeSub == NULL
496 : #if !UCONFIG_NO_LEGACY_CONVERSION
497 : || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
498 : ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
499 : #endif
500 : ) {
501 : /* The converter is not stateful. Store the charset bytes as a fixed string. */
502 0 : subChars = (uint8_t *)chars;
503 : } else {
504 : /*
505 : * The converter has a non-default writeSub() function, indicating
506 : * that it is stateful.
507 : * Store the Unicode string for on-the-fly conversion for correct
508 : * state handling.
509 : */
510 0 : if (length > UCNV_ERROR_BUFFER_LENGTH) {
511 : /*
512 : * Should not occur. The converter should output at least one byte
513 : * per UChar, which means that ucnv_fromUChars() should catch all
514 : * overflows.
515 : */
516 0 : *err = U_BUFFER_OVERFLOW_ERROR;
517 0 : return;
518 : }
519 0 : subChars = (uint8_t *)s;
520 0 : if (length < 0) {
521 0 : length = u_strlen(s);
522 : }
523 0 : length8 = length * U_SIZEOF_UCHAR;
524 : }
525 :
526 : /*
527 : * For storing the substitution string, select either the small buffer inside
528 : * UConverter or allocate a subChars buffer.
529 : */
530 0 : if (length8 > UCNV_MAX_SUBCHAR_LEN) {
531 : /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
532 0 : if (cnv->subChars == (uint8_t *)cnv->subUChars) {
533 : /* Allocate a new buffer for the string. */
534 0 : cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
535 0 : if (cnv->subChars == NULL) {
536 0 : cnv->subChars = (uint8_t *)cnv->subUChars;
537 0 : *err = U_MEMORY_ALLOCATION_ERROR;
538 0 : return;
539 : }
540 0 : uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
541 : }
542 : }
543 :
544 : /* Copy the substitution string into the UConverter or its subChars buffer. */
545 0 : if (length8 == 0) {
546 0 : cnv->subCharLen = 0;
547 : } else {
548 0 : uprv_memcpy(cnv->subChars, subChars, length8);
549 0 : if (subChars == (uint8_t *)chars) {
550 0 : cnv->subCharLen = (int8_t)length8;
551 : } else /* subChars == s */ {
552 0 : cnv->subCharLen = (int8_t)-length;
553 : }
554 : }
555 :
556 : /* See comment in ucnv_setSubstChars(). */
557 0 : cnv->subChar1 = 0;
558 : }
559 :
560 : /*resets the internal states of a converter
561 : *goal : have the same behaviour than a freshly created converter
562 : */
563 0 : static void _reset(UConverter *converter, UConverterResetChoice choice,
564 : UBool callCallback) {
565 0 : if(converter == NULL) {
566 0 : return;
567 : }
568 :
569 0 : if(callCallback) {
570 : /* first, notify the callback functions that the converter is reset */
571 : UErrorCode errorCode;
572 :
573 0 : if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
574 : UConverterToUnicodeArgs toUArgs = {
575 : sizeof(UConverterToUnicodeArgs),
576 : TRUE,
577 : NULL,
578 : NULL,
579 : NULL,
580 : NULL,
581 : NULL,
582 : NULL
583 0 : };
584 0 : toUArgs.converter = converter;
585 0 : errorCode = U_ZERO_ERROR;
586 0 : converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
587 : }
588 0 : if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
589 : UConverterFromUnicodeArgs fromUArgs = {
590 : sizeof(UConverterFromUnicodeArgs),
591 : TRUE,
592 : NULL,
593 : NULL,
594 : NULL,
595 : NULL,
596 : NULL,
597 : NULL
598 0 : };
599 0 : fromUArgs.converter = converter;
600 0 : errorCode = U_ZERO_ERROR;
601 0 : converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
602 : }
603 : }
604 :
605 : /* now reset the converter itself */
606 0 : if(choice<=UCNV_RESET_TO_UNICODE) {
607 0 : converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
608 0 : converter->mode = 0;
609 0 : converter->toULength = 0;
610 0 : converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
611 0 : converter->preToULength = 0;
612 : }
613 0 : if(choice!=UCNV_RESET_TO_UNICODE) {
614 0 : converter->fromUnicodeStatus = 0;
615 0 : converter->fromUChar32 = 0;
616 0 : converter->invalidUCharLength = converter->charErrorBufferLength = 0;
617 0 : converter->preFromUFirstCP = U_SENTINEL;
618 0 : converter->preFromULength = 0;
619 : }
620 :
621 0 : if (converter->sharedData->impl->reset != NULL) {
622 : /* call the custom reset function */
623 0 : converter->sharedData->impl->reset(converter, choice);
624 : }
625 : }
626 :
627 : U_CAPI void U_EXPORT2
628 0 : ucnv_reset(UConverter *converter)
629 : {
630 0 : _reset(converter, UCNV_RESET_BOTH, TRUE);
631 0 : }
632 :
633 : U_CAPI void U_EXPORT2
634 0 : ucnv_resetToUnicode(UConverter *converter)
635 : {
636 0 : _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
637 0 : }
638 :
639 : U_CAPI void U_EXPORT2
640 0 : ucnv_resetFromUnicode(UConverter *converter)
641 : {
642 0 : _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
643 0 : }
644 :
645 : U_CAPI int8_t U_EXPORT2
646 0 : ucnv_getMaxCharSize (const UConverter * converter)
647 : {
648 0 : return converter->maxBytesPerUChar;
649 : }
650 :
651 :
652 : U_CAPI int8_t U_EXPORT2
653 0 : ucnv_getMinCharSize (const UConverter * converter)
654 : {
655 0 : return converter->sharedData->staticData->minBytesPerChar;
656 : }
657 :
658 : U_CAPI const char* U_EXPORT2
659 0 : ucnv_getName (const UConverter * converter, UErrorCode * err)
660 :
661 : {
662 0 : if (U_FAILURE (*err))
663 0 : return NULL;
664 0 : if(converter->sharedData->impl->getName){
665 0 : const char* temp= converter->sharedData->impl->getName(converter);
666 0 : if(temp)
667 0 : return temp;
668 : }
669 0 : return converter->sharedData->staticData->name;
670 : }
671 :
672 : U_CAPI int32_t U_EXPORT2
673 0 : ucnv_getCCSID(const UConverter * converter,
674 : UErrorCode * err)
675 : {
676 : int32_t ccsid;
677 0 : if (U_FAILURE (*err))
678 0 : return -1;
679 :
680 0 : ccsid = converter->sharedData->staticData->codepage;
681 0 : if (ccsid == 0) {
682 : /* Rare case. This is for cases like gb18030,
683 : which doesn't have an IBM canonical name, but does have an IBM alias. */
684 0 : const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
685 0 : if (U_SUCCESS(*err) && standardName) {
686 0 : const char *ccsidStr = uprv_strchr(standardName, '-');
687 0 : if (ccsidStr) {
688 0 : ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
689 : }
690 : }
691 : }
692 0 : return ccsid;
693 : }
694 :
695 :
696 : U_CAPI UConverterPlatform U_EXPORT2
697 0 : ucnv_getPlatform (const UConverter * converter,
698 : UErrorCode * err)
699 : {
700 0 : if (U_FAILURE (*err))
701 0 : return UCNV_UNKNOWN;
702 :
703 0 : return (UConverterPlatform)converter->sharedData->staticData->platform;
704 : }
705 :
706 : U_CAPI void U_EXPORT2
707 0 : ucnv_getToUCallBack (const UConverter * converter,
708 : UConverterToUCallback *action,
709 : const void **context)
710 : {
711 0 : *action = converter->fromCharErrorBehaviour;
712 0 : *context = converter->toUContext;
713 0 : }
714 :
715 : U_CAPI void U_EXPORT2
716 0 : ucnv_getFromUCallBack (const UConverter * converter,
717 : UConverterFromUCallback *action,
718 : const void **context)
719 : {
720 0 : *action = converter->fromUCharErrorBehaviour;
721 0 : *context = converter->fromUContext;
722 0 : }
723 :
724 : U_CAPI void U_EXPORT2
725 0 : ucnv_setToUCallBack (UConverter * converter,
726 : UConverterToUCallback newAction,
727 : const void* newContext,
728 : UConverterToUCallback *oldAction,
729 : const void** oldContext,
730 : UErrorCode * err)
731 : {
732 0 : if (U_FAILURE (*err))
733 0 : return;
734 0 : if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
735 0 : converter->fromCharErrorBehaviour = newAction;
736 0 : if (oldContext) *oldContext = converter->toUContext;
737 0 : converter->toUContext = newContext;
738 : }
739 :
740 : U_CAPI void U_EXPORT2
741 0 : ucnv_setFromUCallBack (UConverter * converter,
742 : UConverterFromUCallback newAction,
743 : const void* newContext,
744 : UConverterFromUCallback *oldAction,
745 : const void** oldContext,
746 : UErrorCode * err)
747 : {
748 0 : if (U_FAILURE (*err))
749 0 : return;
750 0 : if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
751 0 : converter->fromUCharErrorBehaviour = newAction;
752 0 : if (oldContext) *oldContext = converter->fromUContext;
753 0 : converter->fromUContext = newContext;
754 : }
755 :
756 : static void
757 0 : _updateOffsets(int32_t *offsets, int32_t length,
758 : int32_t sourceIndex, int32_t errorInputLength) {
759 : int32_t *limit;
760 : int32_t delta, offset;
761 :
762 0 : if(sourceIndex>=0) {
763 : /*
764 : * adjust each offset by adding the previous sourceIndex
765 : * minus the length of the input sequence that caused an
766 : * error, if any
767 : */
768 0 : delta=sourceIndex-errorInputLength;
769 : } else {
770 : /*
771 : * set each offset to -1 because this conversion function
772 : * does not handle offsets
773 : */
774 0 : delta=-1;
775 : }
776 :
777 0 : limit=offsets+length;
778 0 : if(delta==0) {
779 : /* most common case, nothing to do */
780 0 : } else if(delta>0) {
781 : /* add the delta to each offset (but not if the offset is <0) */
782 0 : while(offsets<limit) {
783 0 : offset=*offsets;
784 0 : if(offset>=0) {
785 0 : *offsets=offset+delta;
786 : }
787 0 : ++offsets;
788 : }
789 : } else /* delta<0 */ {
790 : /*
791 : * set each offset to -1 because this conversion function
792 : * does not handle offsets
793 : * or the error input sequence started in a previous buffer
794 : */
795 0 : while(offsets<limit) {
796 0 : *offsets++=-1;
797 : }
798 : }
799 0 : }
800 :
801 : /* ucnv_fromUnicode --------------------------------------------------------- */
802 :
803 : /*
804 : * Implementation note for m:n conversions
805 : *
806 : * While collecting source units to find the longest match for m:n conversion,
807 : * some source units may need to be stored for a partial match.
808 : * When a second buffer does not yield a match on all of the previously stored
809 : * source units, then they must be "replayed", i.e., fed back into the converter.
810 : *
811 : * The code relies on the fact that replaying will not nest -
812 : * converting a replay buffer will not result in a replay.
813 : * This is because a replay is necessary only after the _continuation_ of a
814 : * partial match failed, but a replay buffer is converted as a whole.
815 : * It may result in some of its units being stored again for a partial match,
816 : * but there will not be a continuation _during_ the replay which could fail.
817 : *
818 : * It is conceivable that a callback function could call the converter
819 : * recursively in a way that causes another replay to be stored, but that
820 : * would be an error in the callback function.
821 : * Such violations will cause assertion failures in a debug build,
822 : * and wrong output, but they will not cause a crash.
823 : */
824 :
825 : static void
826 0 : _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
827 : UConverterFromUnicode fromUnicode;
828 : UConverter *cnv;
829 : const UChar *s;
830 : char *t;
831 : int32_t *offsets;
832 : int32_t sourceIndex;
833 : int32_t errorInputLength;
834 : UBool converterSawEndOfInput, calledCallback;
835 :
836 : /* variables for m:n conversion */
837 : UChar replay[UCNV_EXT_MAX_UCHARS];
838 : const UChar *realSource, *realSourceLimit;
839 : int32_t realSourceIndex;
840 : UBool realFlush;
841 :
842 0 : cnv=pArgs->converter;
843 0 : s=pArgs->source;
844 0 : t=pArgs->target;
845 0 : offsets=pArgs->offsets;
846 :
847 : /* get the converter implementation function */
848 0 : sourceIndex=0;
849 0 : if(offsets==NULL) {
850 0 : fromUnicode=cnv->sharedData->impl->fromUnicode;
851 : } else {
852 0 : fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
853 0 : if(fromUnicode==NULL) {
854 : /* there is no WithOffsets implementation */
855 0 : fromUnicode=cnv->sharedData->impl->fromUnicode;
856 : /* we will write -1 for each offset */
857 0 : sourceIndex=-1;
858 : }
859 : }
860 :
861 0 : if(cnv->preFromULength>=0) {
862 : /* normal mode */
863 0 : realSource=NULL;
864 :
865 : /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
866 0 : realSourceLimit=NULL;
867 0 : realFlush=FALSE;
868 0 : realSourceIndex=0;
869 : } else {
870 : /*
871 : * Previous m:n conversion stored source units from a partial match
872 : * and failed to consume all of them.
873 : * We need to "replay" them from a temporary buffer and convert them first.
874 : */
875 0 : realSource=pArgs->source;
876 0 : realSourceLimit=pArgs->sourceLimit;
877 0 : realFlush=pArgs->flush;
878 0 : realSourceIndex=sourceIndex;
879 :
880 0 : uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
881 0 : pArgs->source=replay;
882 0 : pArgs->sourceLimit=replay-cnv->preFromULength;
883 0 : pArgs->flush=FALSE;
884 0 : sourceIndex=-1;
885 :
886 0 : cnv->preFromULength=0;
887 : }
888 :
889 : /*
890 : * loop for conversion and error handling
891 : *
892 : * loop {
893 : * convert
894 : * loop {
895 : * update offsets
896 : * handle end of input
897 : * handle errors/call callback
898 : * }
899 : * }
900 : */
901 : for(;;) {
902 0 : if(U_SUCCESS(*err)) {
903 : /* convert */
904 0 : fromUnicode(pArgs, err);
905 :
906 : /*
907 : * set a flag for whether the converter
908 : * successfully processed the end of the input
909 : *
910 : * need not check cnv->preFromULength==0 because a replay (<0) will cause
911 : * s<sourceLimit before converterSawEndOfInput is checked
912 : */
913 0 : converterSawEndOfInput=
914 0 : (UBool)(U_SUCCESS(*err) &&
915 0 : pArgs->flush && pArgs->source==pArgs->sourceLimit &&
916 0 : cnv->fromUChar32==0);
917 : } else {
918 : /* handle error from ucnv_convertEx() */
919 0 : converterSawEndOfInput=FALSE;
920 : }
921 :
922 : /* no callback called yet for this iteration */
923 0 : calledCallback=FALSE;
924 :
925 : /* no sourceIndex adjustment for conversion, only for callback output */
926 0 : errorInputLength=0;
927 :
928 : /*
929 : * loop for offsets and error handling
930 : *
931 : * iterates at most 3 times:
932 : * 1. to clean up after the conversion function
933 : * 2. after the callback
934 : * 3. after the callback again if there was truncated input
935 : */
936 : for(;;) {
937 : /* update offsets if we write any */
938 0 : if(offsets!=NULL) {
939 0 : int32_t length=(int32_t)(pArgs->target-t);
940 0 : if(length>0) {
941 0 : _updateOffsets(offsets, length, sourceIndex, errorInputLength);
942 :
943 : /*
944 : * if a converter handles offsets and updates the offsets
945 : * pointer at the end, then pArgs->offset should not change
946 : * here;
947 : * however, some converters do not handle offsets at all
948 : * (sourceIndex<0) or may not update the offsets pointer
949 : */
950 0 : pArgs->offsets=offsets+=length;
951 : }
952 :
953 0 : if(sourceIndex>=0) {
954 0 : sourceIndex+=(int32_t)(pArgs->source-s);
955 : }
956 : }
957 :
958 0 : if(cnv->preFromULength<0) {
959 : /*
960 : * switch the source to new replay units (cannot occur while replaying)
961 : * after offset handling and before end-of-input and callback handling
962 : */
963 0 : if(realSource==NULL) {
964 0 : realSource=pArgs->source;
965 0 : realSourceLimit=pArgs->sourceLimit;
966 0 : realFlush=pArgs->flush;
967 0 : realSourceIndex=sourceIndex;
968 :
969 0 : uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
970 0 : pArgs->source=replay;
971 0 : pArgs->sourceLimit=replay-cnv->preFromULength;
972 0 : pArgs->flush=FALSE;
973 0 : if((sourceIndex+=cnv->preFromULength)<0) {
974 0 : sourceIndex=-1;
975 : }
976 :
977 0 : cnv->preFromULength=0;
978 : } else {
979 : /* see implementation note before _fromUnicodeWithCallback() */
980 0 : U_ASSERT(realSource==NULL);
981 0 : *err=U_INTERNAL_PROGRAM_ERROR;
982 : }
983 : }
984 :
985 : /* update pointers */
986 0 : s=pArgs->source;
987 0 : t=pArgs->target;
988 :
989 0 : if(U_SUCCESS(*err)) {
990 0 : if(s<pArgs->sourceLimit) {
991 : /*
992 : * continue with the conversion loop while there is still input left
993 : * (continue converting by breaking out of only the inner loop)
994 : */
995 0 : break;
996 0 : } else if(realSource!=NULL) {
997 : /* switch back from replaying to the real source and continue */
998 0 : pArgs->source=realSource;
999 0 : pArgs->sourceLimit=realSourceLimit;
1000 0 : pArgs->flush=realFlush;
1001 0 : sourceIndex=realSourceIndex;
1002 :
1003 0 : realSource=NULL;
1004 0 : break;
1005 0 : } else if(pArgs->flush && cnv->fromUChar32!=0) {
1006 : /*
1007 : * the entire input stream is consumed
1008 : * and there is a partial, truncated input sequence left
1009 : */
1010 :
1011 : /* inject an error and continue with callback handling */
1012 0 : *err=U_TRUNCATED_CHAR_FOUND;
1013 0 : calledCallback=FALSE; /* new error condition */
1014 : } else {
1015 : /* input consumed */
1016 0 : if(pArgs->flush) {
1017 : /*
1018 : * return to the conversion loop once more if the flush
1019 : * flag is set and the conversion function has not
1020 : * successfully processed the end of the input yet
1021 : *
1022 : * (continue converting by breaking out of only the inner loop)
1023 : */
1024 0 : if(!converterSawEndOfInput) {
1025 0 : break;
1026 : }
1027 :
1028 : /* reset the converter without calling the callback function */
1029 0 : _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1030 : }
1031 :
1032 : /* done successfully */
1033 0 : return;
1034 : }
1035 : }
1036 :
1037 : /* U_FAILURE(*err) */
1038 : {
1039 : UErrorCode e;
1040 :
1041 0 : if( calledCallback ||
1042 0 : (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1043 0 : (e!=U_INVALID_CHAR_FOUND &&
1044 0 : e!=U_ILLEGAL_CHAR_FOUND &&
1045 : e!=U_TRUNCATED_CHAR_FOUND)
1046 : ) {
1047 : /*
1048 : * the callback did not or cannot resolve the error:
1049 : * set output pointers and return
1050 : *
1051 : * the check for buffer overflow is redundant but it is
1052 : * a high-runner case and hopefully documents the intent
1053 : * well
1054 : *
1055 : * if we were replaying, then the replay buffer must be
1056 : * copied back into the UConverter
1057 : * and the real arguments must be restored
1058 : */
1059 0 : if(realSource!=NULL) {
1060 : int32_t length;
1061 :
1062 0 : U_ASSERT(cnv->preFromULength==0);
1063 :
1064 0 : length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1065 0 : if(length>0) {
1066 0 : u_memcpy(cnv->preFromU, pArgs->source, length);
1067 0 : cnv->preFromULength=(int8_t)-length;
1068 : }
1069 :
1070 0 : pArgs->source=realSource;
1071 0 : pArgs->sourceLimit=realSourceLimit;
1072 0 : pArgs->flush=realFlush;
1073 : }
1074 :
1075 0 : return;
1076 : }
1077 : }
1078 :
1079 : /* callback handling */
1080 : {
1081 : UChar32 codePoint;
1082 :
1083 : /* get and write the code point */
1084 0 : codePoint=cnv->fromUChar32;
1085 0 : errorInputLength=0;
1086 0 : U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1087 0 : cnv->invalidUCharLength=(int8_t)errorInputLength;
1088 :
1089 : /* set the converter state to deal with the next character */
1090 0 : cnv->fromUChar32=0;
1091 :
1092 : /* call the callback function */
1093 0 : cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1094 : cnv->invalidUCharBuffer, errorInputLength, codePoint,
1095 0 : *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1096 0 : err);
1097 : }
1098 :
1099 : /*
1100 : * loop back to the offset handling
1101 : *
1102 : * this flag will indicate after offset handling
1103 : * that a callback was called;
1104 : * if the callback did not resolve the error, then we return
1105 : */
1106 0 : calledCallback=TRUE;
1107 0 : }
1108 0 : }
1109 : }
1110 :
1111 : /*
1112 : * Output the fromUnicode overflow buffer.
1113 : * Call this function if(cnv->charErrorBufferLength>0).
1114 : * @return TRUE if overflow
1115 : */
1116 : static UBool
1117 0 : ucnv_outputOverflowFromUnicode(UConverter *cnv,
1118 : char **target, const char *targetLimit,
1119 : int32_t **pOffsets,
1120 : UErrorCode *err) {
1121 : int32_t *offsets;
1122 : char *overflow, *t;
1123 : int32_t i, length;
1124 :
1125 0 : t=*target;
1126 0 : if(pOffsets!=NULL) {
1127 0 : offsets=*pOffsets;
1128 : } else {
1129 0 : offsets=NULL;
1130 : }
1131 :
1132 0 : overflow=(char *)cnv->charErrorBuffer;
1133 0 : length=cnv->charErrorBufferLength;
1134 0 : i=0;
1135 0 : while(i<length) {
1136 0 : if(t==targetLimit) {
1137 : /* the overflow buffer contains too much, keep the rest */
1138 0 : int32_t j=0;
1139 :
1140 0 : do {
1141 0 : overflow[j++]=overflow[i++];
1142 0 : } while(i<length);
1143 :
1144 0 : cnv->charErrorBufferLength=(int8_t)j;
1145 0 : *target=t;
1146 0 : if(offsets!=NULL) {
1147 0 : *pOffsets=offsets;
1148 : }
1149 0 : *err=U_BUFFER_OVERFLOW_ERROR;
1150 0 : return TRUE;
1151 : }
1152 :
1153 : /* copy the overflow contents to the target */
1154 0 : *t++=overflow[i++];
1155 0 : if(offsets!=NULL) {
1156 0 : *offsets++=-1; /* no source index available for old output */
1157 : }
1158 : }
1159 :
1160 : /* the overflow buffer is completely copied to the target */
1161 0 : cnv->charErrorBufferLength=0;
1162 0 : *target=t;
1163 0 : if(offsets!=NULL) {
1164 0 : *pOffsets=offsets;
1165 : }
1166 0 : return FALSE;
1167 : }
1168 :
1169 : U_CAPI void U_EXPORT2
1170 0 : ucnv_fromUnicode(UConverter *cnv,
1171 : char **target, const char *targetLimit,
1172 : const UChar **source, const UChar *sourceLimit,
1173 : int32_t *offsets,
1174 : UBool flush,
1175 : UErrorCode *err) {
1176 : UConverterFromUnicodeArgs args;
1177 : const UChar *s;
1178 : char *t;
1179 :
1180 : /* check parameters */
1181 0 : if(err==NULL || U_FAILURE(*err)) {
1182 0 : return;
1183 : }
1184 :
1185 0 : if(cnv==NULL || target==NULL || source==NULL) {
1186 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1187 0 : return;
1188 : }
1189 :
1190 0 : s=*source;
1191 0 : t=*target;
1192 :
1193 0 : if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1194 : /*
1195 : Prevent code from going into an infinite loop in case we do hit this
1196 : limit. The limit pointer is expected to be on a UChar * boundary.
1197 : This also prevents the next argument check from failing.
1198 : */
1199 0 : sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1200 : }
1201 :
1202 : /*
1203 : * All these conditions should never happen.
1204 : *
1205 : * 1) Make sure that the limits are >= to the address source or target
1206 : *
1207 : * 2) Make sure that the buffer sizes do not exceed the number range for
1208 : * int32_t because some functions use the size (in units or bytes)
1209 : * rather than comparing pointers, and because offsets are int32_t values.
1210 : *
1211 : * size_t is guaranteed to be unsigned and large enough for the job.
1212 : *
1213 : * Return with an error instead of adjusting the limits because we would
1214 : * not be able to maintain the semantics that either the source must be
1215 : * consumed or the target filled (unless an error occurs).
1216 : * An adjustment would be targetLimit=t+0x7fffffff; for example.
1217 : *
1218 : * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1219 : * to a char * pointer and provide an incomplete UChar code unit.
1220 : */
1221 0 : if (sourceLimit<s || targetLimit<t ||
1222 0 : ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1223 0 : ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1224 0 : (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1225 : {
1226 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1227 0 : return;
1228 : }
1229 :
1230 : /* output the target overflow buffer */
1231 0 : if( cnv->charErrorBufferLength>0 &&
1232 0 : ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1233 : ) {
1234 : /* U_BUFFER_OVERFLOW_ERROR */
1235 0 : return;
1236 : }
1237 : /* *target may have moved, therefore stop using t */
1238 :
1239 0 : if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1240 : /* the overflow buffer is emptied and there is no new input: we are done */
1241 0 : return;
1242 : }
1243 :
1244 : /*
1245 : * Do not simply return with a buffer overflow error if
1246 : * !flush && t==targetLimit
1247 : * because it is possible that the source will not generate any output.
1248 : * For example, the skip callback may be called;
1249 : * it does not output anything.
1250 : */
1251 :
1252 : /* prepare the converter arguments */
1253 0 : args.converter=cnv;
1254 0 : args.flush=flush;
1255 0 : args.offsets=offsets;
1256 0 : args.source=s;
1257 0 : args.sourceLimit=sourceLimit;
1258 0 : args.target=*target;
1259 0 : args.targetLimit=targetLimit;
1260 0 : args.size=sizeof(args);
1261 :
1262 0 : _fromUnicodeWithCallback(&args, err);
1263 :
1264 0 : *source=args.source;
1265 0 : *target=args.target;
1266 : }
1267 :
1268 : /* ucnv_toUnicode() --------------------------------------------------------- */
1269 :
1270 : static void
1271 0 : _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1272 : UConverterToUnicode toUnicode;
1273 : UConverter *cnv;
1274 : const char *s;
1275 : UChar *t;
1276 : int32_t *offsets;
1277 : int32_t sourceIndex;
1278 : int32_t errorInputLength;
1279 : UBool converterSawEndOfInput, calledCallback;
1280 :
1281 : /* variables for m:n conversion */
1282 : char replay[UCNV_EXT_MAX_BYTES];
1283 : const char *realSource, *realSourceLimit;
1284 : int32_t realSourceIndex;
1285 : UBool realFlush;
1286 :
1287 0 : cnv=pArgs->converter;
1288 0 : s=pArgs->source;
1289 0 : t=pArgs->target;
1290 0 : offsets=pArgs->offsets;
1291 :
1292 : /* get the converter implementation function */
1293 0 : sourceIndex=0;
1294 0 : if(offsets==NULL) {
1295 0 : toUnicode=cnv->sharedData->impl->toUnicode;
1296 : } else {
1297 0 : toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1298 0 : if(toUnicode==NULL) {
1299 : /* there is no WithOffsets implementation */
1300 0 : toUnicode=cnv->sharedData->impl->toUnicode;
1301 : /* we will write -1 for each offset */
1302 0 : sourceIndex=-1;
1303 : }
1304 : }
1305 :
1306 0 : if(cnv->preToULength>=0) {
1307 : /* normal mode */
1308 0 : realSource=NULL;
1309 :
1310 : /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1311 0 : realSourceLimit=NULL;
1312 0 : realFlush=FALSE;
1313 0 : realSourceIndex=0;
1314 : } else {
1315 : /*
1316 : * Previous m:n conversion stored source units from a partial match
1317 : * and failed to consume all of them.
1318 : * We need to "replay" them from a temporary buffer and convert them first.
1319 : */
1320 0 : realSource=pArgs->source;
1321 0 : realSourceLimit=pArgs->sourceLimit;
1322 0 : realFlush=pArgs->flush;
1323 0 : realSourceIndex=sourceIndex;
1324 :
1325 0 : uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1326 0 : pArgs->source=replay;
1327 0 : pArgs->sourceLimit=replay-cnv->preToULength;
1328 0 : pArgs->flush=FALSE;
1329 0 : sourceIndex=-1;
1330 :
1331 0 : cnv->preToULength=0;
1332 : }
1333 :
1334 : /*
1335 : * loop for conversion and error handling
1336 : *
1337 : * loop {
1338 : * convert
1339 : * loop {
1340 : * update offsets
1341 : * handle end of input
1342 : * handle errors/call callback
1343 : * }
1344 : * }
1345 : */
1346 : for(;;) {
1347 0 : if(U_SUCCESS(*err)) {
1348 : /* convert */
1349 0 : toUnicode(pArgs, err);
1350 :
1351 : /*
1352 : * set a flag for whether the converter
1353 : * successfully processed the end of the input
1354 : *
1355 : * need not check cnv->preToULength==0 because a replay (<0) will cause
1356 : * s<sourceLimit before converterSawEndOfInput is checked
1357 : */
1358 0 : converterSawEndOfInput=
1359 0 : (UBool)(U_SUCCESS(*err) &&
1360 0 : pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1361 0 : cnv->toULength==0);
1362 : } else {
1363 : /* handle error from getNextUChar() or ucnv_convertEx() */
1364 0 : converterSawEndOfInput=FALSE;
1365 : }
1366 :
1367 : /* no callback called yet for this iteration */
1368 0 : calledCallback=FALSE;
1369 :
1370 : /* no sourceIndex adjustment for conversion, only for callback output */
1371 0 : errorInputLength=0;
1372 :
1373 : /*
1374 : * loop for offsets and error handling
1375 : *
1376 : * iterates at most 3 times:
1377 : * 1. to clean up after the conversion function
1378 : * 2. after the callback
1379 : * 3. after the callback again if there was truncated input
1380 : */
1381 : for(;;) {
1382 : /* update offsets if we write any */
1383 0 : if(offsets!=NULL) {
1384 0 : int32_t length=(int32_t)(pArgs->target-t);
1385 0 : if(length>0) {
1386 0 : _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1387 :
1388 : /*
1389 : * if a converter handles offsets and updates the offsets
1390 : * pointer at the end, then pArgs->offset should not change
1391 : * here;
1392 : * however, some converters do not handle offsets at all
1393 : * (sourceIndex<0) or may not update the offsets pointer
1394 : */
1395 0 : pArgs->offsets=offsets+=length;
1396 : }
1397 :
1398 0 : if(sourceIndex>=0) {
1399 0 : sourceIndex+=(int32_t)(pArgs->source-s);
1400 : }
1401 : }
1402 :
1403 0 : if(cnv->preToULength<0) {
1404 : /*
1405 : * switch the source to new replay units (cannot occur while replaying)
1406 : * after offset handling and before end-of-input and callback handling
1407 : */
1408 0 : if(realSource==NULL) {
1409 0 : realSource=pArgs->source;
1410 0 : realSourceLimit=pArgs->sourceLimit;
1411 0 : realFlush=pArgs->flush;
1412 0 : realSourceIndex=sourceIndex;
1413 :
1414 0 : uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1415 0 : pArgs->source=replay;
1416 0 : pArgs->sourceLimit=replay-cnv->preToULength;
1417 0 : pArgs->flush=FALSE;
1418 0 : if((sourceIndex+=cnv->preToULength)<0) {
1419 0 : sourceIndex=-1;
1420 : }
1421 :
1422 0 : cnv->preToULength=0;
1423 : } else {
1424 : /* see implementation note before _fromUnicodeWithCallback() */
1425 0 : U_ASSERT(realSource==NULL);
1426 0 : *err=U_INTERNAL_PROGRAM_ERROR;
1427 : }
1428 : }
1429 :
1430 : /* update pointers */
1431 0 : s=pArgs->source;
1432 0 : t=pArgs->target;
1433 :
1434 0 : if(U_SUCCESS(*err)) {
1435 0 : if(s<pArgs->sourceLimit) {
1436 : /*
1437 : * continue with the conversion loop while there is still input left
1438 : * (continue converting by breaking out of only the inner loop)
1439 : */
1440 0 : break;
1441 0 : } else if(realSource!=NULL) {
1442 : /* switch back from replaying to the real source and continue */
1443 0 : pArgs->source=realSource;
1444 0 : pArgs->sourceLimit=realSourceLimit;
1445 0 : pArgs->flush=realFlush;
1446 0 : sourceIndex=realSourceIndex;
1447 :
1448 0 : realSource=NULL;
1449 0 : break;
1450 0 : } else if(pArgs->flush && cnv->toULength>0) {
1451 : /*
1452 : * the entire input stream is consumed
1453 : * and there is a partial, truncated input sequence left
1454 : */
1455 :
1456 : /* inject an error and continue with callback handling */
1457 0 : *err=U_TRUNCATED_CHAR_FOUND;
1458 0 : calledCallback=FALSE; /* new error condition */
1459 : } else {
1460 : /* input consumed */
1461 0 : if(pArgs->flush) {
1462 : /*
1463 : * return to the conversion loop once more if the flush
1464 : * flag is set and the conversion function has not
1465 : * successfully processed the end of the input yet
1466 : *
1467 : * (continue converting by breaking out of only the inner loop)
1468 : */
1469 0 : if(!converterSawEndOfInput) {
1470 0 : break;
1471 : }
1472 :
1473 : /* reset the converter without calling the callback function */
1474 0 : _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1475 : }
1476 :
1477 : /* done successfully */
1478 0 : return;
1479 : }
1480 : }
1481 :
1482 : /* U_FAILURE(*err) */
1483 : {
1484 : UErrorCode e;
1485 :
1486 0 : if( calledCallback ||
1487 0 : (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1488 0 : (e!=U_INVALID_CHAR_FOUND &&
1489 0 : e!=U_ILLEGAL_CHAR_FOUND &&
1490 0 : e!=U_TRUNCATED_CHAR_FOUND &&
1491 0 : e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1492 : e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1493 : ) {
1494 : /*
1495 : * the callback did not or cannot resolve the error:
1496 : * set output pointers and return
1497 : *
1498 : * the check for buffer overflow is redundant but it is
1499 : * a high-runner case and hopefully documents the intent
1500 : * well
1501 : *
1502 : * if we were replaying, then the replay buffer must be
1503 : * copied back into the UConverter
1504 : * and the real arguments must be restored
1505 : */
1506 0 : if(realSource!=NULL) {
1507 : int32_t length;
1508 :
1509 0 : U_ASSERT(cnv->preToULength==0);
1510 :
1511 0 : length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1512 0 : if(length>0) {
1513 0 : uprv_memcpy(cnv->preToU, pArgs->source, length);
1514 0 : cnv->preToULength=(int8_t)-length;
1515 : }
1516 :
1517 0 : pArgs->source=realSource;
1518 0 : pArgs->sourceLimit=realSourceLimit;
1519 0 : pArgs->flush=realFlush;
1520 : }
1521 :
1522 0 : return;
1523 : }
1524 : }
1525 :
1526 : /* copy toUBytes[] to invalidCharBuffer[] */
1527 0 : errorInputLength=cnv->invalidCharLength=cnv->toULength;
1528 0 : if(errorInputLength>0) {
1529 0 : uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1530 : }
1531 :
1532 : /* set the converter state to deal with the next character */
1533 0 : cnv->toULength=0;
1534 :
1535 : /* call the callback function */
1536 0 : if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1537 0 : cnv->toUCallbackReason = UCNV_UNASSIGNED;
1538 : }
1539 0 : cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1540 : cnv->invalidCharBuffer, errorInputLength,
1541 : cnv->toUCallbackReason,
1542 0 : err);
1543 0 : cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
1544 :
1545 : /*
1546 : * loop back to the offset handling
1547 : *
1548 : * this flag will indicate after offset handling
1549 : * that a callback was called;
1550 : * if the callback did not resolve the error, then we return
1551 : */
1552 0 : calledCallback=TRUE;
1553 0 : }
1554 0 : }
1555 : }
1556 :
1557 : /*
1558 : * Output the toUnicode overflow buffer.
1559 : * Call this function if(cnv->UCharErrorBufferLength>0).
1560 : * @return TRUE if overflow
1561 : */
1562 : static UBool
1563 0 : ucnv_outputOverflowToUnicode(UConverter *cnv,
1564 : UChar **target, const UChar *targetLimit,
1565 : int32_t **pOffsets,
1566 : UErrorCode *err) {
1567 : int32_t *offsets;
1568 : UChar *overflow, *t;
1569 : int32_t i, length;
1570 :
1571 0 : t=*target;
1572 0 : if(pOffsets!=NULL) {
1573 0 : offsets=*pOffsets;
1574 : } else {
1575 0 : offsets=NULL;
1576 : }
1577 :
1578 0 : overflow=cnv->UCharErrorBuffer;
1579 0 : length=cnv->UCharErrorBufferLength;
1580 0 : i=0;
1581 0 : while(i<length) {
1582 0 : if(t==targetLimit) {
1583 : /* the overflow buffer contains too much, keep the rest */
1584 0 : int32_t j=0;
1585 :
1586 0 : do {
1587 0 : overflow[j++]=overflow[i++];
1588 0 : } while(i<length);
1589 :
1590 0 : cnv->UCharErrorBufferLength=(int8_t)j;
1591 0 : *target=t;
1592 0 : if(offsets!=NULL) {
1593 0 : *pOffsets=offsets;
1594 : }
1595 0 : *err=U_BUFFER_OVERFLOW_ERROR;
1596 0 : return TRUE;
1597 : }
1598 :
1599 : /* copy the overflow contents to the target */
1600 0 : *t++=overflow[i++];
1601 0 : if(offsets!=NULL) {
1602 0 : *offsets++=-1; /* no source index available for old output */
1603 : }
1604 : }
1605 :
1606 : /* the overflow buffer is completely copied to the target */
1607 0 : cnv->UCharErrorBufferLength=0;
1608 0 : *target=t;
1609 0 : if(offsets!=NULL) {
1610 0 : *pOffsets=offsets;
1611 : }
1612 0 : return FALSE;
1613 : }
1614 :
1615 : U_CAPI void U_EXPORT2
1616 0 : ucnv_toUnicode(UConverter *cnv,
1617 : UChar **target, const UChar *targetLimit,
1618 : const char **source, const char *sourceLimit,
1619 : int32_t *offsets,
1620 : UBool flush,
1621 : UErrorCode *err) {
1622 : UConverterToUnicodeArgs args;
1623 : const char *s;
1624 : UChar *t;
1625 :
1626 : /* check parameters */
1627 0 : if(err==NULL || U_FAILURE(*err)) {
1628 0 : return;
1629 : }
1630 :
1631 0 : if(cnv==NULL || target==NULL || source==NULL) {
1632 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1633 0 : return;
1634 : }
1635 :
1636 0 : s=*source;
1637 0 : t=*target;
1638 :
1639 0 : if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1640 : /*
1641 : Prevent code from going into an infinite loop in case we do hit this
1642 : limit. The limit pointer is expected to be on a UChar * boundary.
1643 : This also prevents the next argument check from failing.
1644 : */
1645 0 : targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1646 : }
1647 :
1648 : /*
1649 : * All these conditions should never happen.
1650 : *
1651 : * 1) Make sure that the limits are >= to the address source or target
1652 : *
1653 : * 2) Make sure that the buffer sizes do not exceed the number range for
1654 : * int32_t because some functions use the size (in units or bytes)
1655 : * rather than comparing pointers, and because offsets are int32_t values.
1656 : *
1657 : * size_t is guaranteed to be unsigned and large enough for the job.
1658 : *
1659 : * Return with an error instead of adjusting the limits because we would
1660 : * not be able to maintain the semantics that either the source must be
1661 : * consumed or the target filled (unless an error occurs).
1662 : * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1663 : *
1664 : * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1665 : * to a char * pointer and provide an incomplete UChar code unit.
1666 : */
1667 0 : if (sourceLimit<s || targetLimit<t ||
1668 0 : ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1669 0 : ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1670 0 : (((const char *)targetLimit-(const char *)t) & 1) != 0
1671 : ) {
1672 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1673 0 : return;
1674 : }
1675 :
1676 : /* output the target overflow buffer */
1677 0 : if( cnv->UCharErrorBufferLength>0 &&
1678 0 : ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1679 : ) {
1680 : /* U_BUFFER_OVERFLOW_ERROR */
1681 0 : return;
1682 : }
1683 : /* *target may have moved, therefore stop using t */
1684 :
1685 0 : if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1686 : /* the overflow buffer is emptied and there is no new input: we are done */
1687 0 : return;
1688 : }
1689 :
1690 : /*
1691 : * Do not simply return with a buffer overflow error if
1692 : * !flush && t==targetLimit
1693 : * because it is possible that the source will not generate any output.
1694 : * For example, the skip callback may be called;
1695 : * it does not output anything.
1696 : */
1697 :
1698 : /* prepare the converter arguments */
1699 0 : args.converter=cnv;
1700 0 : args.flush=flush;
1701 0 : args.offsets=offsets;
1702 0 : args.source=s;
1703 0 : args.sourceLimit=sourceLimit;
1704 0 : args.target=*target;
1705 0 : args.targetLimit=targetLimit;
1706 0 : args.size=sizeof(args);
1707 :
1708 0 : _toUnicodeWithCallback(&args, err);
1709 :
1710 0 : *source=args.source;
1711 0 : *target=args.target;
1712 : }
1713 :
1714 : /* ucnv_to/fromUChars() ----------------------------------------------------- */
1715 :
1716 : U_CAPI int32_t U_EXPORT2
1717 0 : ucnv_fromUChars(UConverter *cnv,
1718 : char *dest, int32_t destCapacity,
1719 : const UChar *src, int32_t srcLength,
1720 : UErrorCode *pErrorCode) {
1721 : const UChar *srcLimit;
1722 : char *originalDest, *destLimit;
1723 : int32_t destLength;
1724 :
1725 : /* check arguments */
1726 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1727 0 : return 0;
1728 : }
1729 :
1730 0 : if( cnv==NULL ||
1731 0 : destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1732 0 : srcLength<-1 || (srcLength!=0 && src==NULL)
1733 : ) {
1734 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1735 0 : return 0;
1736 : }
1737 :
1738 : /* initialize */
1739 0 : ucnv_resetFromUnicode(cnv);
1740 0 : originalDest=dest;
1741 0 : if(srcLength==-1) {
1742 0 : srcLength=u_strlen(src);
1743 : }
1744 0 : if(srcLength>0) {
1745 0 : srcLimit=src+srcLength;
1746 0 : destLimit=dest+destCapacity;
1747 :
1748 : /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1749 0 : if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1750 0 : destLimit=(char *)U_MAX_PTR(dest);
1751 : }
1752 :
1753 : /* perform the conversion */
1754 0 : ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1755 0 : destLength=(int32_t)(dest-originalDest);
1756 :
1757 : /* if an overflow occurs, then get the preflighting length */
1758 0 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1759 : char buffer[1024];
1760 :
1761 0 : destLimit=buffer+sizeof(buffer);
1762 0 : do {
1763 0 : dest=buffer;
1764 0 : *pErrorCode=U_ZERO_ERROR;
1765 0 : ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1766 0 : destLength+=(int32_t)(dest-buffer);
1767 0 : } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1768 : }
1769 : } else {
1770 0 : destLength=0;
1771 : }
1772 :
1773 0 : return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1774 : }
1775 :
1776 : U_CAPI int32_t U_EXPORT2
1777 0 : ucnv_toUChars(UConverter *cnv,
1778 : UChar *dest, int32_t destCapacity,
1779 : const char *src, int32_t srcLength,
1780 : UErrorCode *pErrorCode) {
1781 : const char *srcLimit;
1782 : UChar *originalDest, *destLimit;
1783 : int32_t destLength;
1784 :
1785 : /* check arguments */
1786 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1787 0 : return 0;
1788 : }
1789 :
1790 0 : if( cnv==NULL ||
1791 0 : destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1792 0 : srcLength<-1 || (srcLength!=0 && src==NULL))
1793 : {
1794 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1795 0 : return 0;
1796 : }
1797 :
1798 : /* initialize */
1799 0 : ucnv_resetToUnicode(cnv);
1800 0 : originalDest=dest;
1801 0 : if(srcLength==-1) {
1802 0 : srcLength=(int32_t)uprv_strlen(src);
1803 : }
1804 0 : if(srcLength>0) {
1805 0 : srcLimit=src+srcLength;
1806 0 : destLimit=dest+destCapacity;
1807 :
1808 : /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1809 0 : if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1810 0 : destLimit=(UChar *)U_MAX_PTR(dest);
1811 : }
1812 :
1813 : /* perform the conversion */
1814 0 : ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1815 0 : destLength=(int32_t)(dest-originalDest);
1816 :
1817 : /* if an overflow occurs, then get the preflighting length */
1818 0 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1819 : {
1820 : UChar buffer[1024];
1821 :
1822 0 : destLimit=buffer+UPRV_LENGTHOF(buffer);
1823 0 : do {
1824 0 : dest=buffer;
1825 0 : *pErrorCode=U_ZERO_ERROR;
1826 0 : ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1827 0 : destLength+=(int32_t)(dest-buffer);
1828 : }
1829 0 : while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1830 : }
1831 : } else {
1832 0 : destLength=0;
1833 : }
1834 :
1835 0 : return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1836 : }
1837 :
1838 : /* ucnv_getNextUChar() ------------------------------------------------------ */
1839 :
1840 : U_CAPI UChar32 U_EXPORT2
1841 0 : ucnv_getNextUChar(UConverter *cnv,
1842 : const char **source, const char *sourceLimit,
1843 : UErrorCode *err) {
1844 : UConverterToUnicodeArgs args;
1845 : UChar buffer[U16_MAX_LENGTH];
1846 : const char *s;
1847 : UChar32 c;
1848 : int32_t i, length;
1849 :
1850 : /* check parameters */
1851 0 : if(err==NULL || U_FAILURE(*err)) {
1852 0 : return 0xffff;
1853 : }
1854 :
1855 0 : if(cnv==NULL || source==NULL) {
1856 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1857 0 : return 0xffff;
1858 : }
1859 :
1860 0 : s=*source;
1861 0 : if(sourceLimit<s) {
1862 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1863 0 : return 0xffff;
1864 : }
1865 :
1866 : /*
1867 : * Make sure that the buffer sizes do not exceed the number range for
1868 : * int32_t because some functions use the size (in units or bytes)
1869 : * rather than comparing pointers, and because offsets are int32_t values.
1870 : *
1871 : * size_t is guaranteed to be unsigned and large enough for the job.
1872 : *
1873 : * Return with an error instead of adjusting the limits because we would
1874 : * not be able to maintain the semantics that either the source must be
1875 : * consumed or the target filled (unless an error occurs).
1876 : * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1877 : */
1878 0 : if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1879 0 : *err=U_ILLEGAL_ARGUMENT_ERROR;
1880 0 : return 0xffff;
1881 : }
1882 :
1883 0 : c=U_SENTINEL;
1884 :
1885 : /* flush the target overflow buffer */
1886 0 : if(cnv->UCharErrorBufferLength>0) {
1887 : UChar *overflow;
1888 :
1889 0 : overflow=cnv->UCharErrorBuffer;
1890 0 : i=0;
1891 0 : length=cnv->UCharErrorBufferLength;
1892 0 : U16_NEXT(overflow, i, length, c);
1893 :
1894 : /* move the remaining overflow contents up to the beginning */
1895 0 : if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1896 0 : uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1897 0 : cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1898 : }
1899 :
1900 0 : if(!U16_IS_LEAD(c) || i<length) {
1901 0 : return c;
1902 : }
1903 : /*
1904 : * Continue if the overflow buffer contained only a lead surrogate,
1905 : * in case the converter outputs single surrogates from complete
1906 : * input sequences.
1907 : */
1908 : }
1909 :
1910 : /*
1911 : * flush==TRUE is implied for ucnv_getNextUChar()
1912 : *
1913 : * do not simply return even if s==sourceLimit because the converter may
1914 : * not have seen flush==TRUE before
1915 : */
1916 :
1917 : /* prepare the converter arguments */
1918 0 : args.converter=cnv;
1919 0 : args.flush=TRUE;
1920 0 : args.offsets=NULL;
1921 0 : args.source=s;
1922 0 : args.sourceLimit=sourceLimit;
1923 0 : args.target=buffer;
1924 0 : args.targetLimit=buffer+1;
1925 0 : args.size=sizeof(args);
1926 :
1927 0 : if(c<0) {
1928 : /*
1929 : * call the native getNextUChar() implementation if we are
1930 : * at a character boundary (toULength==0)
1931 : *
1932 : * unlike with _toUnicode(), getNextUChar() implementations must set
1933 : * U_TRUNCATED_CHAR_FOUND for truncated input,
1934 : * in addition to setting toULength/toUBytes[]
1935 : */
1936 0 : if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1937 0 : c=cnv->sharedData->impl->getNextUChar(&args, err);
1938 0 : *source=s=args.source;
1939 0 : if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1940 : /* reset the converter without calling the callback function */
1941 0 : _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1942 0 : return 0xffff; /* no output */
1943 0 : } else if(U_SUCCESS(*err) && c>=0) {
1944 0 : return c;
1945 : /*
1946 : * else fall through to use _toUnicode() because
1947 : * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1948 : * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1949 : */
1950 : }
1951 : }
1952 :
1953 : /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1954 0 : _toUnicodeWithCallback(&args, err);
1955 :
1956 0 : if(*err==U_BUFFER_OVERFLOW_ERROR) {
1957 0 : *err=U_ZERO_ERROR;
1958 : }
1959 :
1960 0 : i=0;
1961 0 : length=(int32_t)(args.target-buffer);
1962 : } else {
1963 : /* write the lead surrogate from the overflow buffer */
1964 0 : buffer[0]=(UChar)c;
1965 0 : args.target=buffer+1;
1966 0 : i=0;
1967 0 : length=1;
1968 : }
1969 :
1970 : /* buffer contents starts at i and ends before length */
1971 :
1972 0 : if(U_FAILURE(*err)) {
1973 0 : c=0xffff; /* no output */
1974 0 : } else if(length==0) {
1975 : /* no input or only state changes */
1976 0 : *err=U_INDEX_OUTOFBOUNDS_ERROR;
1977 : /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1978 0 : c=0xffff; /* no output */
1979 : } else {
1980 0 : c=buffer[0];
1981 0 : i=1;
1982 0 : if(!U16_IS_LEAD(c)) {
1983 : /* consume c=buffer[0], done */
1984 : } else {
1985 : /* got a lead surrogate, see if a trail surrogate follows */
1986 : UChar c2;
1987 :
1988 0 : if(cnv->UCharErrorBufferLength>0) {
1989 : /* got overflow output from the conversion */
1990 0 : if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1991 : /* got a trail surrogate, too */
1992 0 : c=U16_GET_SUPPLEMENTARY(c, c2);
1993 :
1994 : /* move the remaining overflow contents up to the beginning */
1995 0 : if((--cnv->UCharErrorBufferLength)>0) {
1996 0 : uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1997 0 : cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1998 : }
1999 : } else {
2000 : /* c is an unpaired lead surrogate, just return it */
2001 : }
2002 0 : } else if(args.source<sourceLimit) {
2003 : /* convert once more, to buffer[1] */
2004 0 : args.targetLimit=buffer+2;
2005 0 : _toUnicodeWithCallback(&args, err);
2006 0 : if(*err==U_BUFFER_OVERFLOW_ERROR) {
2007 0 : *err=U_ZERO_ERROR;
2008 : }
2009 :
2010 0 : length=(int32_t)(args.target-buffer);
2011 0 : if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2012 : /* got a trail surrogate, too */
2013 0 : c=U16_GET_SUPPLEMENTARY(c, c2);
2014 0 : i=2;
2015 : }
2016 : }
2017 : }
2018 : }
2019 :
2020 : /*
2021 : * move leftover output from buffer[i..length[
2022 : * into the beginning of the overflow buffer
2023 : */
2024 0 : if(i<length) {
2025 : /* move further overflow back */
2026 0 : int32_t delta=length-i;
2027 0 : if((length=cnv->UCharErrorBufferLength)>0) {
2028 0 : uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2029 0 : length*U_SIZEOF_UCHAR);
2030 : }
2031 0 : cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2032 :
2033 0 : cnv->UCharErrorBuffer[0]=buffer[i++];
2034 0 : if(delta>1) {
2035 0 : cnv->UCharErrorBuffer[1]=buffer[i];
2036 : }
2037 : }
2038 :
2039 0 : *source=args.source;
2040 0 : return c;
2041 : }
2042 :
2043 : /* ucnv_convert() and siblings ---------------------------------------------- */
2044 :
2045 : U_CAPI void U_EXPORT2
2046 0 : ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2047 : char **target, const char *targetLimit,
2048 : const char **source, const char *sourceLimit,
2049 : UChar *pivotStart, UChar **pivotSource,
2050 : UChar **pivotTarget, const UChar *pivotLimit,
2051 : UBool reset, UBool flush,
2052 : UErrorCode *pErrorCode) {
2053 : UChar pivotBuffer[CHUNK_SIZE];
2054 : const UChar *myPivotSource;
2055 : UChar *myPivotTarget;
2056 : const char *s;
2057 : char *t;
2058 :
2059 : UConverterToUnicodeArgs toUArgs;
2060 : UConverterFromUnicodeArgs fromUArgs;
2061 : UConverterConvert convert;
2062 :
2063 : /* error checking */
2064 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2065 0 : return;
2066 : }
2067 :
2068 0 : if( targetCnv==NULL || sourceCnv==NULL ||
2069 0 : source==NULL || *source==NULL ||
2070 0 : target==NULL || *target==NULL || targetLimit==NULL
2071 : ) {
2072 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2073 0 : return;
2074 : }
2075 :
2076 0 : s=*source;
2077 0 : t=*target;
2078 0 : if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2079 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2080 0 : return;
2081 : }
2082 :
2083 : /*
2084 : * Make sure that the buffer sizes do not exceed the number range for
2085 : * int32_t. See ucnv_toUnicode() for a more detailed comment.
2086 : */
2087 0 : if(
2088 0 : (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2089 0 : ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2090 : ) {
2091 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2092 0 : return;
2093 : }
2094 :
2095 0 : if(pivotStart==NULL) {
2096 0 : if(!flush) {
2097 : /* streaming conversion requires an explicit pivot buffer */
2098 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2099 0 : return;
2100 : }
2101 :
2102 : /* use the stack pivot buffer */
2103 0 : myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2104 0 : pivotSource=(UChar **)&myPivotSource;
2105 0 : pivotTarget=&myPivotTarget;
2106 0 : pivotLimit=pivotBuffer+CHUNK_SIZE;
2107 0 : } else if( pivotStart>=pivotLimit ||
2108 0 : pivotSource==NULL || *pivotSource==NULL ||
2109 0 : pivotTarget==NULL || *pivotTarget==NULL ||
2110 : pivotLimit==NULL
2111 : ) {
2112 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2113 0 : return;
2114 : }
2115 :
2116 0 : if(sourceLimit==NULL) {
2117 : /* get limit of single-byte-NUL-terminated source string */
2118 0 : sourceLimit=uprv_strchr(*source, 0);
2119 : }
2120 :
2121 0 : if(reset) {
2122 0 : ucnv_resetToUnicode(sourceCnv);
2123 0 : ucnv_resetFromUnicode(targetCnv);
2124 0 : *pivotSource=*pivotTarget=pivotStart;
2125 0 : } else if(targetCnv->charErrorBufferLength>0) {
2126 : /* output the targetCnv overflow buffer */
2127 0 : if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2128 : /* U_BUFFER_OVERFLOW_ERROR */
2129 0 : return;
2130 : }
2131 : /* *target has moved, therefore stop using t */
2132 :
2133 0 : if( !flush &&
2134 0 : targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2135 0 : sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2136 : ) {
2137 : /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2138 0 : return;
2139 : }
2140 : }
2141 :
2142 : /* Is direct-UTF-8 conversion available? */
2143 0 : if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2144 0 : targetCnv->sharedData->impl->fromUTF8!=NULL
2145 : ) {
2146 0 : convert=targetCnv->sharedData->impl->fromUTF8;
2147 0 : } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2148 0 : sourceCnv->sharedData->impl->toUTF8!=NULL
2149 : ) {
2150 0 : convert=sourceCnv->sharedData->impl->toUTF8;
2151 : } else {
2152 0 : convert=NULL;
2153 : }
2154 :
2155 : /*
2156 : * If direct-UTF-8 conversion is available, then we use a smaller
2157 : * pivot buffer for error handling and partial matches
2158 : * so that we quickly return to direct conversion.
2159 : *
2160 : * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2161 : *
2162 : * We could reduce the pivot buffer size further, at the cost of
2163 : * buffer overflows from callbacks.
2164 : * The pivot buffer should not be smaller than the maximum number of
2165 : * fromUnicode extension table input UChars
2166 : * (for m:n conversion, see
2167 : * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2168 : * or 2 for surrogate pairs.
2169 : *
2170 : * Too small a buffer can cause thrashing between pivoting and direct
2171 : * conversion, with function call overhead outweighing the benefits
2172 : * of direct conversion.
2173 : */
2174 0 : if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2175 0 : pivotLimit=pivotStart+32;
2176 : }
2177 :
2178 : /* prepare the converter arguments */
2179 0 : fromUArgs.converter=targetCnv;
2180 0 : fromUArgs.flush=FALSE;
2181 0 : fromUArgs.offsets=NULL;
2182 0 : fromUArgs.target=*target;
2183 0 : fromUArgs.targetLimit=targetLimit;
2184 0 : fromUArgs.size=sizeof(fromUArgs);
2185 :
2186 0 : toUArgs.converter=sourceCnv;
2187 0 : toUArgs.flush=flush;
2188 0 : toUArgs.offsets=NULL;
2189 0 : toUArgs.source=s;
2190 0 : toUArgs.sourceLimit=sourceLimit;
2191 0 : toUArgs.targetLimit=pivotLimit;
2192 0 : toUArgs.size=sizeof(toUArgs);
2193 :
2194 : /*
2195 : * TODO: Consider separating this function into two functions,
2196 : * extracting exactly the conversion loop,
2197 : * for readability and to reduce the set of visible variables.
2198 : *
2199 : * Otherwise stop using s and t from here on.
2200 : */
2201 0 : s=t=NULL;
2202 :
2203 : /*
2204 : * conversion loop
2205 : *
2206 : * The sequence of steps in the loop may appear backward,
2207 : * but the principle is simple:
2208 : * In the chain of
2209 : * source - sourceCnv overflow - pivot - targetCnv overflow - target
2210 : * empty out later buffers before refilling them from earlier ones.
2211 : *
2212 : * The targetCnv overflow buffer is flushed out only once before the loop.
2213 : */
2214 : for(;;) {
2215 : /*
2216 : * if(pivot not empty or error or replay or flush fromUnicode) {
2217 : * fromUnicode(pivot -> target);
2218 : * }
2219 : *
2220 : * For pivoting conversion; and for direct conversion for
2221 : * error callback handling and flushing the replay buffer.
2222 : */
2223 0 : if( *pivotSource<*pivotTarget ||
2224 0 : U_FAILURE(*pErrorCode) ||
2225 0 : targetCnv->preFromULength<0 ||
2226 0 : fromUArgs.flush
2227 : ) {
2228 0 : fromUArgs.source=*pivotSource;
2229 0 : fromUArgs.sourceLimit=*pivotTarget;
2230 0 : _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2231 0 : if(U_FAILURE(*pErrorCode)) {
2232 : /* target overflow, or conversion error */
2233 0 : *pivotSource=(UChar *)fromUArgs.source;
2234 0 : break;
2235 : }
2236 :
2237 : /*
2238 : * _fromUnicodeWithCallback() must have consumed the pivot contents
2239 : * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2240 : */
2241 : }
2242 :
2243 : /* The pivot buffer is empty; reset it so we start at pivotStart. */
2244 0 : *pivotSource=*pivotTarget=pivotStart;
2245 :
2246 : /*
2247 : * if(sourceCnv overflow buffer not empty) {
2248 : * move(sourceCnv overflow buffer -> pivot);
2249 : * continue;
2250 : * }
2251 : */
2252 : /* output the sourceCnv overflow buffer */
2253 0 : if(sourceCnv->UCharErrorBufferLength>0) {
2254 0 : if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2255 : /* U_BUFFER_OVERFLOW_ERROR */
2256 0 : *pErrorCode=U_ZERO_ERROR;
2257 : }
2258 0 : continue;
2259 : }
2260 :
2261 : /*
2262 : * check for end of input and break if done
2263 : *
2264 : * Checking both flush and fromUArgs.flush ensures that the converters
2265 : * have been called with the flush flag set if the ucnv_convertEx()
2266 : * caller set it.
2267 : */
2268 0 : if( toUArgs.source==sourceLimit &&
2269 0 : sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2270 0 : (!flush || fromUArgs.flush)
2271 : ) {
2272 : /* done successfully */
2273 : break;
2274 : }
2275 :
2276 : /*
2277 : * use direct conversion if available
2278 : * but not if continuing a partial match
2279 : * or flushing the toUnicode replay buffer
2280 : */
2281 0 : if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2282 0 : if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2283 : /* remove a warning that may be set by this function */
2284 0 : *pErrorCode=U_ZERO_ERROR;
2285 : }
2286 0 : convert(&fromUArgs, &toUArgs, pErrorCode);
2287 0 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2288 0 : break;
2289 0 : } else if(U_FAILURE(*pErrorCode)) {
2290 0 : if(sourceCnv->toULength>0) {
2291 : /*
2292 : * Fall through to calling _toUnicodeWithCallback()
2293 : * for callback handling.
2294 : *
2295 : * The pivot buffer will be reset with
2296 : * *pivotSource=*pivotTarget=pivotStart;
2297 : * which indicates a toUnicode error to the caller
2298 : * (*pivotSource==pivotStart shows no pivot UChars consumed).
2299 : */
2300 : } else {
2301 : /*
2302 : * Indicate a fromUnicode error to the caller
2303 : * (*pivotSource>pivotStart shows some pivot UChars consumed).
2304 : */
2305 0 : *pivotSource=*pivotTarget=pivotStart+1;
2306 : /*
2307 : * Loop around to calling _fromUnicodeWithCallbacks()
2308 : * for callback handling.
2309 : */
2310 0 : continue;
2311 : }
2312 0 : } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2313 : /*
2314 : * No error, but the implementation requested to temporarily
2315 : * fall back to pivoting.
2316 : */
2317 0 : *pErrorCode=U_ZERO_ERROR;
2318 : /*
2319 : * The following else branches are almost identical to the end-of-input
2320 : * handling in _toUnicodeWithCallback().
2321 : * Avoid calling it just for the end of input.
2322 : */
2323 0 : } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2324 : /*
2325 : * the entire input stream is consumed
2326 : * and there is a partial, truncated input sequence left
2327 : */
2328 :
2329 : /* inject an error and continue with callback handling */
2330 0 : *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2331 : } else {
2332 : /* input consumed */
2333 0 : if(flush) {
2334 : /* reset the converters without calling the callback functions */
2335 0 : _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2336 0 : _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2337 : }
2338 :
2339 : /* done successfully */
2340 0 : break;
2341 : }
2342 : }
2343 :
2344 : /*
2345 : * toUnicode(source -> pivot);
2346 : *
2347 : * For pivoting conversion; and for direct conversion for
2348 : * error callback handling, continuing partial matches
2349 : * and flushing the replay buffer.
2350 : *
2351 : * The pivot buffer is empty and reset.
2352 : */
2353 0 : toUArgs.target=pivotStart; /* ==*pivotTarget */
2354 : /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2355 0 : _toUnicodeWithCallback(&toUArgs, pErrorCode);
2356 0 : *pivotTarget=toUArgs.target;
2357 0 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2358 : /* pivot overflow: continue with the conversion loop */
2359 0 : *pErrorCode=U_ZERO_ERROR;
2360 0 : } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2361 : /* conversion error, or there was nothing left to convert */
2362 0 : break;
2363 : }
2364 : /*
2365 : * else:
2366 : * _toUnicodeWithCallback() wrote into the pivot buffer,
2367 : * continue with fromUnicode conversion.
2368 : *
2369 : * Set the fromUnicode flush flag if we flush and if toUnicode has
2370 : * processed the end of the input.
2371 : */
2372 0 : if( flush && toUArgs.source==sourceLimit &&
2373 0 : sourceCnv->preToULength>=0 &&
2374 0 : sourceCnv->UCharErrorBufferLength==0
2375 : ) {
2376 0 : fromUArgs.flush=TRUE;
2377 : }
2378 : }
2379 :
2380 : /*
2381 : * The conversion loop is exited when one of the following is true:
2382 : * - the entire source text has been converted successfully to the target buffer
2383 : * - a target buffer overflow occurred
2384 : * - a conversion error occurred
2385 : */
2386 :
2387 0 : *source=toUArgs.source;
2388 0 : *target=fromUArgs.target;
2389 :
2390 : /* terminate the target buffer if possible */
2391 0 : if(flush && U_SUCCESS(*pErrorCode)) {
2392 0 : if(*target!=targetLimit) {
2393 0 : **target=0;
2394 0 : if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2395 0 : *pErrorCode=U_ZERO_ERROR;
2396 : }
2397 : } else {
2398 0 : *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2399 : }
2400 : }
2401 : }
2402 :
2403 : /* internal implementation of ucnv_convert() etc. with preflighting */
2404 : static int32_t
2405 0 : ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2406 : char *target, int32_t targetCapacity,
2407 : const char *source, int32_t sourceLength,
2408 : UErrorCode *pErrorCode) {
2409 : UChar pivotBuffer[CHUNK_SIZE];
2410 : UChar *pivot, *pivot2;
2411 :
2412 : char *myTarget;
2413 : const char *sourceLimit;
2414 : const char *targetLimit;
2415 0 : int32_t targetLength=0;
2416 :
2417 : /* set up */
2418 0 : if(sourceLength<0) {
2419 0 : sourceLimit=uprv_strchr(source, 0);
2420 : } else {
2421 0 : sourceLimit=source+sourceLength;
2422 : }
2423 :
2424 : /* if there is no input data, we're done */
2425 0 : if(source==sourceLimit) {
2426 0 : return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2427 : }
2428 :
2429 0 : pivot=pivot2=pivotBuffer;
2430 0 : myTarget=target;
2431 0 : targetLength=0;
2432 :
2433 0 : if(targetCapacity>0) {
2434 : /* perform real conversion */
2435 0 : targetLimit=target+targetCapacity;
2436 0 : ucnv_convertEx(outConverter, inConverter,
2437 : &myTarget, targetLimit,
2438 : &source, sourceLimit,
2439 : pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2440 : FALSE,
2441 : TRUE,
2442 0 : pErrorCode);
2443 0 : targetLength=(int32_t)(myTarget-target);
2444 : }
2445 :
2446 : /*
2447 : * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2448 : * to it but continue the conversion in order to store in targetCapacity
2449 : * the number of bytes that was required.
2450 : */
2451 0 : if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2452 : {
2453 : char targetBuffer[CHUNK_SIZE];
2454 :
2455 0 : targetLimit=targetBuffer+CHUNK_SIZE;
2456 0 : do {
2457 0 : *pErrorCode=U_ZERO_ERROR;
2458 0 : myTarget=targetBuffer;
2459 0 : ucnv_convertEx(outConverter, inConverter,
2460 : &myTarget, targetLimit,
2461 : &source, sourceLimit,
2462 : pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2463 : FALSE,
2464 : TRUE,
2465 0 : pErrorCode);
2466 0 : targetLength+=(int32_t)(myTarget-targetBuffer);
2467 0 : } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2468 :
2469 : /* done with preflighting, set warnings and errors as appropriate */
2470 0 : return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2471 : }
2472 :
2473 : /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2474 0 : return targetLength;
2475 : }
2476 :
2477 : U_CAPI int32_t U_EXPORT2
2478 0 : ucnv_convert(const char *toConverterName, const char *fromConverterName,
2479 : char *target, int32_t targetCapacity,
2480 : const char *source, int32_t sourceLength,
2481 : UErrorCode *pErrorCode) {
2482 : UConverter in, out; /* stack-allocated */
2483 : UConverter *inConverter, *outConverter;
2484 : int32_t targetLength;
2485 :
2486 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2487 0 : return 0;
2488 : }
2489 :
2490 0 : if( source==NULL || sourceLength<-1 ||
2491 0 : targetCapacity<0 || (targetCapacity>0 && target==NULL)
2492 : ) {
2493 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2494 0 : return 0;
2495 : }
2496 :
2497 : /* if there is no input data, we're done */
2498 0 : if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2499 0 : return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2500 : }
2501 :
2502 : /* create the converters */
2503 0 : inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2504 0 : if(U_FAILURE(*pErrorCode)) {
2505 0 : return 0;
2506 : }
2507 :
2508 0 : outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2509 0 : if(U_FAILURE(*pErrorCode)) {
2510 0 : ucnv_close(inConverter);
2511 0 : return 0;
2512 : }
2513 :
2514 : targetLength=ucnv_internalConvert(outConverter, inConverter,
2515 : target, targetCapacity,
2516 : source, sourceLength,
2517 0 : pErrorCode);
2518 :
2519 0 : ucnv_close(inConverter);
2520 0 : ucnv_close(outConverter);
2521 :
2522 0 : return targetLength;
2523 : }
2524 :
2525 : /* @internal */
2526 : static int32_t
2527 0 : ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2528 : UConverterType algorithmicType,
2529 : UConverter *cnv,
2530 : char *target, int32_t targetCapacity,
2531 : const char *source, int32_t sourceLength,
2532 : UErrorCode *pErrorCode) {
2533 : UConverter algoConverterStatic; /* stack-allocated */
2534 : UConverter *algoConverter, *to, *from;
2535 : int32_t targetLength;
2536 :
2537 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2538 0 : return 0;
2539 : }
2540 :
2541 0 : if( cnv==NULL || source==NULL || sourceLength<-1 ||
2542 0 : targetCapacity<0 || (targetCapacity>0 && target==NULL)
2543 : ) {
2544 0 : *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2545 0 : return 0;
2546 : }
2547 :
2548 : /* if there is no input data, we're done */
2549 0 : if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2550 0 : return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2551 : }
2552 :
2553 : /* create the algorithmic converter */
2554 : algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2555 0 : "", 0, pErrorCode);
2556 0 : if(U_FAILURE(*pErrorCode)) {
2557 0 : return 0;
2558 : }
2559 :
2560 : /* reset the other converter */
2561 0 : if(convertToAlgorithmic) {
2562 : /* cnv->Unicode->algo */
2563 0 : ucnv_resetToUnicode(cnv);
2564 0 : to=algoConverter;
2565 0 : from=cnv;
2566 : } else {
2567 : /* algo->Unicode->cnv */
2568 0 : ucnv_resetFromUnicode(cnv);
2569 0 : from=algoConverter;
2570 0 : to=cnv;
2571 : }
2572 :
2573 : targetLength=ucnv_internalConvert(to, from,
2574 : target, targetCapacity,
2575 : source, sourceLength,
2576 0 : pErrorCode);
2577 :
2578 0 : ucnv_close(algoConverter);
2579 :
2580 0 : return targetLength;
2581 : }
2582 :
2583 : U_CAPI int32_t U_EXPORT2
2584 0 : ucnv_toAlgorithmic(UConverterType algorithmicType,
2585 : UConverter *cnv,
2586 : char *target, int32_t targetCapacity,
2587 : const char *source, int32_t sourceLength,
2588 : UErrorCode *pErrorCode) {
2589 : return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2590 : target, targetCapacity,
2591 : source, sourceLength,
2592 0 : pErrorCode);
2593 : }
2594 :
2595 : U_CAPI int32_t U_EXPORT2
2596 0 : ucnv_fromAlgorithmic(UConverter *cnv,
2597 : UConverterType algorithmicType,
2598 : char *target, int32_t targetCapacity,
2599 : const char *source, int32_t sourceLength,
2600 : UErrorCode *pErrorCode) {
2601 : return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2602 : target, targetCapacity,
2603 : source, sourceLength,
2604 0 : pErrorCode);
2605 : }
2606 :
2607 : U_CAPI UConverterType U_EXPORT2
2608 0 : ucnv_getType(const UConverter* converter)
2609 : {
2610 0 : int8_t type = converter->sharedData->staticData->conversionType;
2611 : #if !UCONFIG_NO_LEGACY_CONVERSION
2612 : if(type == UCNV_MBCS) {
2613 : return ucnv_MBCSGetType(converter);
2614 : }
2615 : #endif
2616 0 : return (UConverterType)type;
2617 : }
2618 :
2619 : U_CAPI void U_EXPORT2
2620 0 : ucnv_getStarters(const UConverter* converter,
2621 : UBool starters[256],
2622 : UErrorCode* err)
2623 : {
2624 0 : if (err == NULL || U_FAILURE(*err)) {
2625 0 : return;
2626 : }
2627 :
2628 0 : if(converter->sharedData->impl->getStarters != NULL) {
2629 0 : converter->sharedData->impl->getStarters(converter, starters, err);
2630 : } else {
2631 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
2632 : }
2633 : }
2634 :
2635 0 : static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2636 : {
2637 : UErrorCode errorCode;
2638 : const char *name;
2639 : int32_t i;
2640 :
2641 0 : if(cnv==NULL) {
2642 0 : return NULL;
2643 : }
2644 :
2645 0 : errorCode=U_ZERO_ERROR;
2646 0 : name=ucnv_getName(cnv, &errorCode);
2647 0 : if(U_FAILURE(errorCode)) {
2648 0 : return NULL;
2649 : }
2650 :
2651 0 : for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i)
2652 : {
2653 0 : if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2654 : {
2655 0 : return ambiguousConverters+i;
2656 : }
2657 : }
2658 :
2659 0 : return NULL;
2660 : }
2661 :
2662 : U_CAPI void U_EXPORT2
2663 0 : ucnv_fixFileSeparator(const UConverter *cnv,
2664 : UChar* source,
2665 : int32_t sourceLength) {
2666 : const UAmbiguousConverter *a;
2667 : int32_t i;
2668 : UChar variant5c;
2669 :
2670 0 : if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2671 : {
2672 0 : return;
2673 : }
2674 :
2675 0 : variant5c=a->variant5c;
2676 0 : for(i=0; i<sourceLength; ++i) {
2677 0 : if(source[i]==variant5c) {
2678 0 : source[i]=0x5c;
2679 : }
2680 : }
2681 : }
2682 :
2683 : U_CAPI UBool U_EXPORT2
2684 0 : ucnv_isAmbiguous(const UConverter *cnv) {
2685 0 : return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2686 : }
2687 :
2688 : U_CAPI void U_EXPORT2
2689 0 : ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2690 : {
2691 0 : cnv->useFallback = usesFallback;
2692 0 : }
2693 :
2694 : U_CAPI UBool U_EXPORT2
2695 0 : ucnv_usesFallback(const UConverter *cnv)
2696 : {
2697 0 : return cnv->useFallback;
2698 : }
2699 :
2700 : U_CAPI void U_EXPORT2
2701 0 : ucnv_getInvalidChars (const UConverter * converter,
2702 : char *errBytes,
2703 : int8_t * len,
2704 : UErrorCode * err)
2705 : {
2706 0 : if (err == NULL || U_FAILURE(*err))
2707 : {
2708 0 : return;
2709 : }
2710 0 : if (len == NULL || errBytes == NULL || converter == NULL)
2711 : {
2712 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
2713 0 : return;
2714 : }
2715 0 : if (*len < converter->invalidCharLength)
2716 : {
2717 0 : *err = U_INDEX_OUTOFBOUNDS_ERROR;
2718 0 : return;
2719 : }
2720 0 : if ((*len = converter->invalidCharLength) > 0)
2721 : {
2722 0 : uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2723 : }
2724 : }
2725 :
2726 : U_CAPI void U_EXPORT2
2727 0 : ucnv_getInvalidUChars (const UConverter * converter,
2728 : UChar *errChars,
2729 : int8_t * len,
2730 : UErrorCode * err)
2731 : {
2732 0 : if (err == NULL || U_FAILURE(*err))
2733 : {
2734 0 : return;
2735 : }
2736 0 : if (len == NULL || errChars == NULL || converter == NULL)
2737 : {
2738 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
2739 0 : return;
2740 : }
2741 0 : if (*len < converter->invalidUCharLength)
2742 : {
2743 0 : *err = U_INDEX_OUTOFBOUNDS_ERROR;
2744 0 : return;
2745 : }
2746 0 : if ((*len = converter->invalidUCharLength) > 0)
2747 : {
2748 0 : u_memcpy (errChars, converter->invalidUCharBuffer, *len);
2749 : }
2750 : }
2751 :
2752 : #define SIG_MAX_LEN 5
2753 :
2754 : U_CAPI const char* U_EXPORT2
2755 0 : ucnv_detectUnicodeSignature( const char* source,
2756 : int32_t sourceLength,
2757 : int32_t* signatureLength,
2758 : UErrorCode* pErrorCode) {
2759 : int32_t dummy;
2760 :
2761 : /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2762 : * bytes we don't misdetect something
2763 : */
2764 0 : char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2765 0 : int i = 0;
2766 :
2767 0 : if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2768 0 : return NULL;
2769 : }
2770 :
2771 0 : if(source == NULL || sourceLength < -1){
2772 0 : *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2773 0 : return NULL;
2774 : }
2775 :
2776 0 : if(signatureLength == NULL) {
2777 0 : signatureLength = &dummy;
2778 : }
2779 :
2780 0 : if(sourceLength==-1){
2781 0 : sourceLength=(int32_t)uprv_strlen(source);
2782 : }
2783 :
2784 :
2785 0 : while(i<sourceLength&& i<SIG_MAX_LEN){
2786 0 : start[i]=source[i];
2787 0 : i++;
2788 : }
2789 :
2790 0 : if(start[0] == '\xFE' && start[1] == '\xFF') {
2791 0 : *signatureLength=2;
2792 0 : return "UTF-16BE";
2793 0 : } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2794 0 : if(start[2] == '\x00' && start[3] =='\x00') {
2795 0 : *signatureLength=4;
2796 0 : return "UTF-32LE";
2797 : } else {
2798 0 : *signatureLength=2;
2799 0 : return "UTF-16LE";
2800 : }
2801 0 : } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2802 0 : *signatureLength=3;
2803 0 : return "UTF-8";
2804 0 : } else if(start[0] == '\x00' && start[1] == '\x00' &&
2805 0 : start[2] == '\xFE' && start[3]=='\xFF') {
2806 0 : *signatureLength=4;
2807 0 : return "UTF-32BE";
2808 0 : } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2809 0 : *signatureLength=3;
2810 0 : return "SCSU";
2811 0 : } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2812 0 : *signatureLength=3;
2813 0 : return "BOCU-1";
2814 0 : } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2815 : /*
2816 : * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2817 : * depending on the second UTF-16 code unit.
2818 : * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2819 : * if it occurs.
2820 : *
2821 : * So far we have +/v
2822 : */
2823 0 : if(start[3] == '\x38' && start[4] == '\x2D') {
2824 : /* 5 bytes +/v8- */
2825 0 : *signatureLength=5;
2826 0 : return "UTF-7";
2827 0 : } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2828 : /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2829 0 : *signatureLength=4;
2830 0 : return "UTF-7";
2831 : }
2832 0 : }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2833 0 : *signatureLength=4;
2834 0 : return "UTF-EBCDIC";
2835 : }
2836 :
2837 :
2838 : /* no known Unicode signature byte sequence recognized */
2839 0 : *signatureLength=0;
2840 0 : return NULL;
2841 : }
2842 :
2843 : U_CAPI int32_t U_EXPORT2
2844 0 : ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2845 : {
2846 0 : if(status == NULL || U_FAILURE(*status)){
2847 0 : return -1;
2848 : }
2849 0 : if(cnv == NULL){
2850 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
2851 0 : return -1;
2852 : }
2853 :
2854 0 : if(cnv->preFromUFirstCP >= 0){
2855 0 : return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2856 0 : }else if(cnv->preFromULength < 0){
2857 0 : return -cnv->preFromULength ;
2858 0 : }else if(cnv->fromUChar32 > 0){
2859 0 : return 1;
2860 : }
2861 0 : return 0;
2862 :
2863 : }
2864 :
2865 : U_CAPI int32_t U_EXPORT2
2866 0 : ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2867 :
2868 0 : if(status == NULL || U_FAILURE(*status)){
2869 0 : return -1;
2870 : }
2871 0 : if(cnv == NULL){
2872 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
2873 0 : return -1;
2874 : }
2875 :
2876 0 : if(cnv->preToULength > 0){
2877 0 : return cnv->preToULength ;
2878 0 : }else if(cnv->preToULength < 0){
2879 0 : return -cnv->preToULength;
2880 0 : }else if(cnv->toULength > 0){
2881 0 : return cnv->toULength;
2882 : }
2883 0 : return 0;
2884 : }
2885 :
2886 : U_CAPI UBool U_EXPORT2
2887 0 : ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
2888 0 : if (U_FAILURE(*status)) {
2889 0 : return FALSE;
2890 : }
2891 :
2892 0 : if (cnv == NULL) {
2893 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
2894 0 : return FALSE;
2895 : }
2896 :
2897 0 : switch (ucnv_getType(cnv)) {
2898 : case UCNV_SBCS:
2899 : case UCNV_DBCS:
2900 : case UCNV_UTF32_BigEndian:
2901 : case UCNV_UTF32_LittleEndian:
2902 : case UCNV_UTF32:
2903 : case UCNV_US_ASCII:
2904 0 : return TRUE;
2905 : default:
2906 0 : return FALSE;
2907 : }
2908 : }
2909 : #endif
2910 :
2911 : /*
2912 : * Hey, Emacs, please set the following:
2913 : *
2914 : * Local Variables:
2915 : * indent-tabs-mode: nil
2916 : * End:
2917 : *
2918 : */
|