Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 2002-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : * file name: ucnv_u32.c
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2002jul01
14 : * created by: Markus W. Scherer
15 : *
16 : * UTF-32 converter implementation. Used to be in ucnv_utf.c.
17 : */
18 :
19 : #include "unicode/utypes.h"
20 :
21 : #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
22 :
23 : #include "unicode/ucnv.h"
24 : #include "unicode/utf.h"
25 : #include "ucnv_bld.h"
26 : #include "ucnv_cnv.h"
27 : #include "cmemory.h"
28 :
29 : #define MAXIMUM_UCS2 0x0000FFFF
30 : #define MAXIMUM_UTF 0x0010FFFF
31 : #define HALF_SHIFT 10
32 : #define HALF_BASE 0x0010000
33 : #define HALF_MASK 0x3FF
34 : #define SURROGATE_HIGH_START 0xD800
35 : #define SURROGATE_LOW_START 0xDC00
36 :
37 : /* -SURROGATE_LOW_START + HALF_BASE */
38 : #define SURROGATE_LOW_BASE 9216
39 :
40 : enum {
41 : UCNV_NEED_TO_WRITE_BOM=1
42 : };
43 :
44 : /* UTF-32BE ----------------------------------------------------------------- */
45 : U_CDECL_BEGIN
46 : static void U_CALLCONV
47 0 : T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
48 : UErrorCode * err)
49 : {
50 0 : const unsigned char *mySource = (unsigned char *) args->source;
51 0 : UChar *myTarget = args->target;
52 0 : const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
53 0 : const UChar *targetLimit = args->targetLimit;
54 0 : unsigned char *toUBytes = args->converter->toUBytes;
55 : uint32_t ch, i;
56 :
57 : /* Restore state of current sequence */
58 0 : if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
59 0 : i = args->converter->toULength; /* restore # of bytes consumed */
60 0 : args->converter->toULength = 0;
61 :
62 0 : ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
63 0 : args->converter->toUnicodeStatus = 0;
64 0 : goto morebytes;
65 : }
66 :
67 0 : while (mySource < sourceLimit && myTarget < targetLimit) {
68 0 : i = 0;
69 0 : ch = 0;
70 : morebytes:
71 0 : while (i < sizeof(uint32_t)) {
72 0 : if (mySource < sourceLimit) {
73 0 : ch = (ch << 8) | (uint8_t)(*mySource);
74 0 : toUBytes[i++] = (char) *(mySource++);
75 : }
76 : else {
77 : /* stores a partially calculated target*/
78 : /* + 1 to make 0 a valid character */
79 0 : args->converter->toUnicodeStatus = ch + 1;
80 0 : args->converter->toULength = (int8_t) i;
81 0 : goto donefornow;
82 : }
83 : }
84 :
85 0 : if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
86 : /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
87 0 : if (ch <= MAXIMUM_UCS2)
88 : {
89 : /* fits in 16 bits */
90 0 : *(myTarget++) = (UChar) ch;
91 : }
92 : else {
93 : /* write out the surrogates */
94 0 : *(myTarget++) = U16_LEAD(ch);
95 0 : ch = U16_TRAIL(ch);
96 0 : if (myTarget < targetLimit) {
97 0 : *(myTarget++) = (UChar)ch;
98 : }
99 : else {
100 : /* Put in overflow buffer (not handled here) */
101 0 : args->converter->UCharErrorBuffer[0] = (UChar) ch;
102 0 : args->converter->UCharErrorBufferLength = 1;
103 0 : *err = U_BUFFER_OVERFLOW_ERROR;
104 0 : break;
105 : }
106 : }
107 : }
108 : else {
109 0 : args->converter->toULength = (int8_t)i;
110 0 : *err = U_ILLEGAL_CHAR_FOUND;
111 0 : break;
112 : }
113 : }
114 :
115 : donefornow:
116 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
117 : /* End of target buffer */
118 0 : *err = U_BUFFER_OVERFLOW_ERROR;
119 : }
120 :
121 0 : args->target = myTarget;
122 0 : args->source = (const char *) mySource;
123 0 : }
124 :
125 : static void U_CALLCONV
126 0 : T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
127 : UErrorCode * err)
128 : {
129 0 : const unsigned char *mySource = (unsigned char *) args->source;
130 0 : UChar *myTarget = args->target;
131 0 : int32_t *myOffsets = args->offsets;
132 0 : const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
133 0 : const UChar *targetLimit = args->targetLimit;
134 0 : unsigned char *toUBytes = args->converter->toUBytes;
135 : uint32_t ch, i;
136 0 : int32_t offsetNum = 0;
137 :
138 : /* Restore state of current sequence */
139 0 : if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
140 0 : i = args->converter->toULength; /* restore # of bytes consumed */
141 0 : args->converter->toULength = 0;
142 :
143 0 : ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
144 0 : args->converter->toUnicodeStatus = 0;
145 0 : goto morebytes;
146 : }
147 :
148 0 : while (mySource < sourceLimit && myTarget < targetLimit) {
149 0 : i = 0;
150 0 : ch = 0;
151 : morebytes:
152 0 : while (i < sizeof(uint32_t)) {
153 0 : if (mySource < sourceLimit) {
154 0 : ch = (ch << 8) | (uint8_t)(*mySource);
155 0 : toUBytes[i++] = (char) *(mySource++);
156 : }
157 : else {
158 : /* stores a partially calculated target*/
159 : /* + 1 to make 0 a valid character */
160 0 : args->converter->toUnicodeStatus = ch + 1;
161 0 : args->converter->toULength = (int8_t) i;
162 0 : goto donefornow;
163 : }
164 : }
165 :
166 0 : if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
167 : /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
168 0 : if (ch <= MAXIMUM_UCS2) {
169 : /* fits in 16 bits */
170 0 : *(myTarget++) = (UChar) ch;
171 0 : *(myOffsets++) = offsetNum;
172 : }
173 : else {
174 : /* write out the surrogates */
175 0 : *(myTarget++) = U16_LEAD(ch);
176 0 : *myOffsets++ = offsetNum;
177 0 : ch = U16_TRAIL(ch);
178 0 : if (myTarget < targetLimit)
179 : {
180 0 : *(myTarget++) = (UChar)ch;
181 0 : *(myOffsets++) = offsetNum;
182 : }
183 : else {
184 : /* Put in overflow buffer (not handled here) */
185 0 : args->converter->UCharErrorBuffer[0] = (UChar) ch;
186 0 : args->converter->UCharErrorBufferLength = 1;
187 0 : *err = U_BUFFER_OVERFLOW_ERROR;
188 0 : break;
189 : }
190 : }
191 : }
192 : else {
193 0 : args->converter->toULength = (int8_t)i;
194 0 : *err = U_ILLEGAL_CHAR_FOUND;
195 0 : break;
196 : }
197 0 : offsetNum += i;
198 : }
199 :
200 : donefornow:
201 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
202 : {
203 : /* End of target buffer */
204 0 : *err = U_BUFFER_OVERFLOW_ERROR;
205 : }
206 :
207 0 : args->target = myTarget;
208 0 : args->source = (const char *) mySource;
209 0 : args->offsets = myOffsets;
210 0 : }
211 :
212 : static void U_CALLCONV
213 0 : T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
214 : UErrorCode * err)
215 : {
216 0 : const UChar *mySource = args->source;
217 : unsigned char *myTarget;
218 0 : const UChar *sourceLimit = args->sourceLimit;
219 0 : const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
220 : UChar32 ch, ch2;
221 : unsigned int indexToWrite;
222 : unsigned char temp[sizeof(uint32_t)];
223 :
224 0 : if(mySource >= sourceLimit) {
225 : /* no input, nothing to do */
226 0 : return;
227 : }
228 :
229 : /* write the BOM if necessary */
230 0 : if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
231 : static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
232 0 : ucnv_fromUWriteBytes(args->converter,
233 : bom, 4,
234 : &args->target, args->targetLimit,
235 : &args->offsets, -1,
236 0 : err);
237 0 : args->converter->fromUnicodeStatus=0;
238 : }
239 :
240 0 : myTarget = (unsigned char *) args->target;
241 0 : temp[0] = 0;
242 :
243 0 : if (args->converter->fromUChar32) {
244 0 : ch = args->converter->fromUChar32;
245 0 : args->converter->fromUChar32 = 0;
246 0 : goto lowsurogate;
247 : }
248 :
249 0 : while (mySource < sourceLimit && myTarget < targetLimit) {
250 0 : ch = *(mySource++);
251 :
252 0 : if (U_IS_SURROGATE(ch)) {
253 0 : if (U_IS_LEAD(ch)) {
254 : lowsurogate:
255 0 : if (mySource < sourceLimit) {
256 0 : ch2 = *mySource;
257 0 : if (U_IS_TRAIL(ch2)) {
258 0 : ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
259 0 : mySource++;
260 : }
261 : else {
262 : /* this is an unmatched trail code unit (2nd surrogate) */
263 : /* callback(illegal) */
264 0 : args->converter->fromUChar32 = ch;
265 0 : *err = U_ILLEGAL_CHAR_FOUND;
266 0 : break;
267 : }
268 : }
269 : else {
270 : /* ran out of source */
271 0 : args->converter->fromUChar32 = ch;
272 0 : if (args->flush) {
273 : /* this is an unmatched trail code unit (2nd surrogate) */
274 : /* callback(illegal) */
275 0 : *err = U_ILLEGAL_CHAR_FOUND;
276 : }
277 0 : break;
278 : }
279 : }
280 : else {
281 : /* this is an unmatched trail code unit (2nd surrogate) */
282 : /* callback(illegal) */
283 0 : args->converter->fromUChar32 = ch;
284 0 : *err = U_ILLEGAL_CHAR_FOUND;
285 0 : break;
286 : }
287 : }
288 :
289 : /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
290 0 : temp[1] = (uint8_t) (ch >> 16 & 0x1F);
291 0 : temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
292 0 : temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
293 :
294 0 : for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
295 0 : if (myTarget < targetLimit) {
296 0 : *(myTarget++) = temp[indexToWrite];
297 : }
298 : else {
299 0 : args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
300 0 : *err = U_BUFFER_OVERFLOW_ERROR;
301 : }
302 : }
303 : }
304 :
305 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
306 0 : *err = U_BUFFER_OVERFLOW_ERROR;
307 : }
308 :
309 0 : args->target = (char *) myTarget;
310 0 : args->source = mySource;
311 : }
312 :
313 : static void U_CALLCONV
314 0 : T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
315 : UErrorCode * err)
316 : {
317 0 : const UChar *mySource = args->source;
318 : unsigned char *myTarget;
319 : int32_t *myOffsets;
320 0 : const UChar *sourceLimit = args->sourceLimit;
321 0 : const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
322 : UChar32 ch, ch2;
323 0 : int32_t offsetNum = 0;
324 : unsigned int indexToWrite;
325 : unsigned char temp[sizeof(uint32_t)];
326 :
327 0 : if(mySource >= sourceLimit) {
328 : /* no input, nothing to do */
329 0 : return;
330 : }
331 :
332 : /* write the BOM if necessary */
333 0 : if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
334 : static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
335 0 : ucnv_fromUWriteBytes(args->converter,
336 : bom, 4,
337 : &args->target, args->targetLimit,
338 : &args->offsets, -1,
339 0 : err);
340 0 : args->converter->fromUnicodeStatus=0;
341 : }
342 :
343 0 : myTarget = (unsigned char *) args->target;
344 0 : myOffsets = args->offsets;
345 0 : temp[0] = 0;
346 :
347 0 : if (args->converter->fromUChar32) {
348 0 : ch = args->converter->fromUChar32;
349 0 : args->converter->fromUChar32 = 0;
350 0 : goto lowsurogate;
351 : }
352 :
353 0 : while (mySource < sourceLimit && myTarget < targetLimit) {
354 0 : ch = *(mySource++);
355 :
356 0 : if (U_IS_SURROGATE(ch)) {
357 0 : if (U_IS_LEAD(ch)) {
358 : lowsurogate:
359 0 : if (mySource < sourceLimit) {
360 0 : ch2 = *mySource;
361 0 : if (U_IS_TRAIL(ch2)) {
362 0 : ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
363 0 : mySource++;
364 : }
365 : else {
366 : /* this is an unmatched trail code unit (2nd surrogate) */
367 : /* callback(illegal) */
368 0 : args->converter->fromUChar32 = ch;
369 0 : *err = U_ILLEGAL_CHAR_FOUND;
370 0 : break;
371 : }
372 : }
373 : else {
374 : /* ran out of source */
375 0 : args->converter->fromUChar32 = ch;
376 0 : if (args->flush) {
377 : /* this is an unmatched trail code unit (2nd surrogate) */
378 : /* callback(illegal) */
379 0 : *err = U_ILLEGAL_CHAR_FOUND;
380 : }
381 0 : break;
382 : }
383 : }
384 : else {
385 : /* this is an unmatched trail code unit (2nd surrogate) */
386 : /* callback(illegal) */
387 0 : args->converter->fromUChar32 = ch;
388 0 : *err = U_ILLEGAL_CHAR_FOUND;
389 0 : break;
390 : }
391 : }
392 :
393 : /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
394 0 : temp[1] = (uint8_t) (ch >> 16 & 0x1F);
395 0 : temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
396 0 : temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
397 :
398 0 : for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
399 0 : if (myTarget < targetLimit) {
400 0 : *(myTarget++) = temp[indexToWrite];
401 0 : *(myOffsets++) = offsetNum;
402 : }
403 : else {
404 0 : args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
405 0 : *err = U_BUFFER_OVERFLOW_ERROR;
406 : }
407 : }
408 0 : offsetNum = offsetNum + 1 + (temp[1] != 0);
409 : }
410 :
411 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
412 0 : *err = U_BUFFER_OVERFLOW_ERROR;
413 : }
414 :
415 0 : args->target = (char *) myTarget;
416 0 : args->source = mySource;
417 0 : args->offsets = myOffsets;
418 : }
419 :
420 : static UChar32 U_CALLCONV
421 0 : T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
422 : UErrorCode* err)
423 : {
424 : const uint8_t *mySource;
425 : UChar32 myUChar;
426 : int32_t length;
427 :
428 0 : mySource = (const uint8_t *)args->source;
429 0 : if (mySource >= (const uint8_t *)args->sourceLimit)
430 : {
431 : /* no input */
432 0 : *err = U_INDEX_OUTOFBOUNDS_ERROR;
433 0 : return 0xffff;
434 : }
435 :
436 0 : length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
437 0 : if (length < 4)
438 : {
439 : /* got a partial character */
440 0 : uprv_memcpy(args->converter->toUBytes, mySource, length);
441 0 : args->converter->toULength = (int8_t)length;
442 0 : args->source = (const char *)(mySource + length);
443 0 : *err = U_TRUNCATED_CHAR_FOUND;
444 0 : return 0xffff;
445 : }
446 :
447 : /* Don't even try to do a direct cast because the value may be on an odd address. */
448 0 : myUChar = ((UChar32)mySource[0] << 24)
449 0 : | ((UChar32)mySource[1] << 16)
450 0 : | ((UChar32)mySource[2] << 8)
451 0 : | ((UChar32)mySource[3]);
452 :
453 0 : args->source = (const char *)(mySource + 4);
454 0 : if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
455 0 : return myUChar;
456 : }
457 :
458 0 : uprv_memcpy(args->converter->toUBytes, mySource, 4);
459 0 : args->converter->toULength = 4;
460 :
461 0 : *err = U_ILLEGAL_CHAR_FOUND;
462 0 : return 0xffff;
463 : }
464 : U_CDECL_END
465 : static const UConverterImpl _UTF32BEImpl = {
466 : UCNV_UTF32_BigEndian,
467 :
468 : NULL,
469 : NULL,
470 :
471 : NULL,
472 : NULL,
473 : NULL,
474 :
475 : T_UConverter_toUnicode_UTF32_BE,
476 : T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
477 : T_UConverter_fromUnicode_UTF32_BE,
478 : T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
479 : T_UConverter_getNextUChar_UTF32_BE,
480 :
481 : NULL,
482 : NULL,
483 : NULL,
484 : NULL,
485 : ucnv_getNonSurrogateUnicodeSet,
486 :
487 : NULL,
488 : NULL
489 : };
490 :
491 : /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
492 : static const UConverterStaticData _UTF32BEStaticData = {
493 : sizeof(UConverterStaticData),
494 : "UTF-32BE",
495 : 1232,
496 : UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
497 : { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
498 : 0,
499 : 0,
500 : { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
501 : };
502 :
503 : const UConverterSharedData _UTF32BEData =
504 : UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
505 :
506 : /* UTF-32LE ---------------------------------------------------------- */
507 : U_CDECL_BEGIN
508 : static void U_CALLCONV
509 0 : T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
510 : UErrorCode * err)
511 : {
512 0 : const unsigned char *mySource = (unsigned char *) args->source;
513 0 : UChar *myTarget = args->target;
514 0 : const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
515 0 : const UChar *targetLimit = args->targetLimit;
516 0 : unsigned char *toUBytes = args->converter->toUBytes;
517 : uint32_t ch, i;
518 :
519 : /* Restore state of current sequence */
520 0 : if (args->converter->toUnicodeStatus && myTarget < targetLimit)
521 : {
522 0 : i = args->converter->toULength; /* restore # of bytes consumed */
523 0 : args->converter->toULength = 0;
524 :
525 : /* Stores the previously calculated ch from a previous call*/
526 0 : ch = args->converter->toUnicodeStatus - 1;
527 0 : args->converter->toUnicodeStatus = 0;
528 0 : goto morebytes;
529 : }
530 :
531 0 : while (mySource < sourceLimit && myTarget < targetLimit)
532 : {
533 0 : i = 0;
534 0 : ch = 0;
535 : morebytes:
536 0 : while (i < sizeof(uint32_t))
537 : {
538 0 : if (mySource < sourceLimit)
539 : {
540 0 : ch |= ((uint8_t)(*mySource)) << (i * 8);
541 0 : toUBytes[i++] = (char) *(mySource++);
542 : }
543 : else
544 : {
545 : /* stores a partially calculated target*/
546 : /* + 1 to make 0 a valid character */
547 0 : args->converter->toUnicodeStatus = ch + 1;
548 0 : args->converter->toULength = (int8_t) i;
549 0 : goto donefornow;
550 : }
551 : }
552 :
553 0 : if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
554 : /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
555 0 : if (ch <= MAXIMUM_UCS2) {
556 : /* fits in 16 bits */
557 0 : *(myTarget++) = (UChar) ch;
558 : }
559 : else {
560 : /* write out the surrogates */
561 0 : *(myTarget++) = U16_LEAD(ch);
562 0 : ch = U16_TRAIL(ch);
563 0 : if (myTarget < targetLimit) {
564 0 : *(myTarget++) = (UChar)ch;
565 : }
566 : else {
567 : /* Put in overflow buffer (not handled here) */
568 0 : args->converter->UCharErrorBuffer[0] = (UChar) ch;
569 0 : args->converter->UCharErrorBufferLength = 1;
570 0 : *err = U_BUFFER_OVERFLOW_ERROR;
571 0 : break;
572 : }
573 : }
574 : }
575 : else {
576 0 : args->converter->toULength = (int8_t)i;
577 0 : *err = U_ILLEGAL_CHAR_FOUND;
578 0 : break;
579 : }
580 : }
581 :
582 : donefornow:
583 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
584 : {
585 : /* End of target buffer */
586 0 : *err = U_BUFFER_OVERFLOW_ERROR;
587 : }
588 :
589 0 : args->target = myTarget;
590 0 : args->source = (const char *) mySource;
591 0 : }
592 :
593 : static void U_CALLCONV
594 0 : T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
595 : UErrorCode * err)
596 : {
597 0 : const unsigned char *mySource = (unsigned char *) args->source;
598 0 : UChar *myTarget = args->target;
599 0 : int32_t *myOffsets = args->offsets;
600 0 : const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
601 0 : const UChar *targetLimit = args->targetLimit;
602 0 : unsigned char *toUBytes = args->converter->toUBytes;
603 : uint32_t ch, i;
604 0 : int32_t offsetNum = 0;
605 :
606 : /* Restore state of current sequence */
607 0 : if (args->converter->toUnicodeStatus && myTarget < targetLimit)
608 : {
609 0 : i = args->converter->toULength; /* restore # of bytes consumed */
610 0 : args->converter->toULength = 0;
611 :
612 : /* Stores the previously calculated ch from a previous call*/
613 0 : ch = args->converter->toUnicodeStatus - 1;
614 0 : args->converter->toUnicodeStatus = 0;
615 0 : goto morebytes;
616 : }
617 :
618 0 : while (mySource < sourceLimit && myTarget < targetLimit)
619 : {
620 0 : i = 0;
621 0 : ch = 0;
622 : morebytes:
623 0 : while (i < sizeof(uint32_t))
624 : {
625 0 : if (mySource < sourceLimit)
626 : {
627 0 : ch |= ((uint8_t)(*mySource)) << (i * 8);
628 0 : toUBytes[i++] = (char) *(mySource++);
629 : }
630 : else
631 : {
632 : /* stores a partially calculated target*/
633 : /* + 1 to make 0 a valid character */
634 0 : args->converter->toUnicodeStatus = ch + 1;
635 0 : args->converter->toULength = (int8_t) i;
636 0 : goto donefornow;
637 : }
638 : }
639 :
640 0 : if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
641 : {
642 : /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
643 0 : if (ch <= MAXIMUM_UCS2)
644 : {
645 : /* fits in 16 bits */
646 0 : *(myTarget++) = (UChar) ch;
647 0 : *(myOffsets++) = offsetNum;
648 : }
649 : else {
650 : /* write out the surrogates */
651 0 : *(myTarget++) = U16_LEAD(ch);
652 0 : *(myOffsets++) = offsetNum;
653 0 : ch = U16_TRAIL(ch);
654 0 : if (myTarget < targetLimit)
655 : {
656 0 : *(myTarget++) = (UChar)ch;
657 0 : *(myOffsets++) = offsetNum;
658 : }
659 : else
660 : {
661 : /* Put in overflow buffer (not handled here) */
662 0 : args->converter->UCharErrorBuffer[0] = (UChar) ch;
663 0 : args->converter->UCharErrorBufferLength = 1;
664 0 : *err = U_BUFFER_OVERFLOW_ERROR;
665 0 : break;
666 : }
667 : }
668 : }
669 : else
670 : {
671 0 : args->converter->toULength = (int8_t)i;
672 0 : *err = U_ILLEGAL_CHAR_FOUND;
673 0 : break;
674 : }
675 0 : offsetNum += i;
676 : }
677 :
678 : donefornow:
679 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
680 : {
681 : /* End of target buffer */
682 0 : *err = U_BUFFER_OVERFLOW_ERROR;
683 : }
684 :
685 0 : args->target = myTarget;
686 0 : args->source = (const char *) mySource;
687 0 : args->offsets = myOffsets;
688 0 : }
689 :
690 : static void U_CALLCONV
691 0 : T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
692 : UErrorCode * err)
693 : {
694 0 : const UChar *mySource = args->source;
695 : unsigned char *myTarget;
696 0 : const UChar *sourceLimit = args->sourceLimit;
697 0 : const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
698 : UChar32 ch, ch2;
699 : unsigned int indexToWrite;
700 : unsigned char temp[sizeof(uint32_t)];
701 :
702 0 : if(mySource >= sourceLimit) {
703 : /* no input, nothing to do */
704 0 : return;
705 : }
706 :
707 : /* write the BOM if necessary */
708 0 : if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
709 : static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
710 0 : ucnv_fromUWriteBytes(args->converter,
711 : bom, 4,
712 : &args->target, args->targetLimit,
713 : &args->offsets, -1,
714 0 : err);
715 0 : args->converter->fromUnicodeStatus=0;
716 : }
717 :
718 0 : myTarget = (unsigned char *) args->target;
719 0 : temp[3] = 0;
720 :
721 0 : if (args->converter->fromUChar32)
722 : {
723 0 : ch = args->converter->fromUChar32;
724 0 : args->converter->fromUChar32 = 0;
725 0 : goto lowsurogate;
726 : }
727 :
728 0 : while (mySource < sourceLimit && myTarget < targetLimit)
729 : {
730 0 : ch = *(mySource++);
731 :
732 0 : if (U16_IS_SURROGATE(ch)) {
733 0 : if (U16_IS_LEAD(ch))
734 : {
735 : lowsurogate:
736 0 : if (mySource < sourceLimit)
737 : {
738 0 : ch2 = *mySource;
739 0 : if (U16_IS_TRAIL(ch2)) {
740 0 : ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
741 0 : mySource++;
742 : }
743 : else {
744 : /* this is an unmatched trail code unit (2nd surrogate) */
745 : /* callback(illegal) */
746 0 : args->converter->fromUChar32 = ch;
747 0 : *err = U_ILLEGAL_CHAR_FOUND;
748 0 : break;
749 : }
750 : }
751 : else {
752 : /* ran out of source */
753 0 : args->converter->fromUChar32 = ch;
754 0 : if (args->flush) {
755 : /* this is an unmatched trail code unit (2nd surrogate) */
756 : /* callback(illegal) */
757 0 : *err = U_ILLEGAL_CHAR_FOUND;
758 : }
759 0 : break;
760 : }
761 : }
762 : else {
763 : /* this is an unmatched trail code unit (2nd surrogate) */
764 : /* callback(illegal) */
765 0 : args->converter->fromUChar32 = ch;
766 0 : *err = U_ILLEGAL_CHAR_FOUND;
767 0 : break;
768 : }
769 : }
770 :
771 : /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
772 0 : temp[2] = (uint8_t) (ch >> 16 & 0x1F);
773 0 : temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
774 0 : temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
775 :
776 0 : for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
777 : {
778 0 : if (myTarget < targetLimit)
779 : {
780 0 : *(myTarget++) = temp[indexToWrite];
781 : }
782 : else
783 : {
784 0 : args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
785 0 : *err = U_BUFFER_OVERFLOW_ERROR;
786 : }
787 : }
788 : }
789 :
790 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
791 : {
792 0 : *err = U_BUFFER_OVERFLOW_ERROR;
793 : }
794 :
795 0 : args->target = (char *) myTarget;
796 0 : args->source = mySource;
797 : }
798 :
799 : static void U_CALLCONV
800 0 : T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
801 : UErrorCode * err)
802 : {
803 0 : const UChar *mySource = args->source;
804 : unsigned char *myTarget;
805 : int32_t *myOffsets;
806 0 : const UChar *sourceLimit = args->sourceLimit;
807 0 : const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
808 : UChar32 ch, ch2;
809 : unsigned int indexToWrite;
810 : unsigned char temp[sizeof(uint32_t)];
811 0 : int32_t offsetNum = 0;
812 :
813 0 : if(mySource >= sourceLimit) {
814 : /* no input, nothing to do */
815 0 : return;
816 : }
817 :
818 : /* write the BOM if necessary */
819 0 : if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
820 : static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
821 0 : ucnv_fromUWriteBytes(args->converter,
822 : bom, 4,
823 : &args->target, args->targetLimit,
824 : &args->offsets, -1,
825 0 : err);
826 0 : args->converter->fromUnicodeStatus=0;
827 : }
828 :
829 0 : myTarget = (unsigned char *) args->target;
830 0 : myOffsets = args->offsets;
831 0 : temp[3] = 0;
832 :
833 0 : if (args->converter->fromUChar32)
834 : {
835 0 : ch = args->converter->fromUChar32;
836 0 : args->converter->fromUChar32 = 0;
837 0 : goto lowsurogate;
838 : }
839 :
840 0 : while (mySource < sourceLimit && myTarget < targetLimit)
841 : {
842 0 : ch = *(mySource++);
843 :
844 0 : if (U16_IS_SURROGATE(ch)) {
845 0 : if (U16_IS_LEAD(ch))
846 : {
847 : lowsurogate:
848 0 : if (mySource < sourceLimit)
849 : {
850 0 : ch2 = *mySource;
851 0 : if (U16_IS_TRAIL(ch2))
852 : {
853 0 : ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
854 0 : mySource++;
855 : }
856 : else {
857 : /* this is an unmatched trail code unit (2nd surrogate) */
858 : /* callback(illegal) */
859 0 : args->converter->fromUChar32 = ch;
860 0 : *err = U_ILLEGAL_CHAR_FOUND;
861 0 : break;
862 : }
863 : }
864 : else {
865 : /* ran out of source */
866 0 : args->converter->fromUChar32 = ch;
867 0 : if (args->flush) {
868 : /* this is an unmatched trail code unit (2nd surrogate) */
869 : /* callback(illegal) */
870 0 : *err = U_ILLEGAL_CHAR_FOUND;
871 : }
872 0 : break;
873 : }
874 : }
875 : else {
876 : /* this is an unmatched trail code unit (2nd surrogate) */
877 : /* callback(illegal) */
878 0 : args->converter->fromUChar32 = ch;
879 0 : *err = U_ILLEGAL_CHAR_FOUND;
880 0 : break;
881 : }
882 : }
883 :
884 : /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
885 0 : temp[2] = (uint8_t) (ch >> 16 & 0x1F);
886 0 : temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */
887 0 : temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */
888 :
889 0 : for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
890 : {
891 0 : if (myTarget < targetLimit)
892 : {
893 0 : *(myTarget++) = temp[indexToWrite];
894 0 : *(myOffsets++) = offsetNum;
895 : }
896 : else
897 : {
898 0 : args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
899 0 : *err = U_BUFFER_OVERFLOW_ERROR;
900 : }
901 : }
902 0 : offsetNum = offsetNum + 1 + (temp[2] != 0);
903 : }
904 :
905 0 : if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
906 : {
907 0 : *err = U_BUFFER_OVERFLOW_ERROR;
908 : }
909 :
910 0 : args->target = (char *) myTarget;
911 0 : args->source = mySource;
912 0 : args->offsets = myOffsets;
913 : }
914 :
915 : static UChar32 U_CALLCONV
916 0 : T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
917 : UErrorCode* err)
918 : {
919 : const uint8_t *mySource;
920 : UChar32 myUChar;
921 : int32_t length;
922 :
923 0 : mySource = (const uint8_t *)args->source;
924 0 : if (mySource >= (const uint8_t *)args->sourceLimit)
925 : {
926 : /* no input */
927 0 : *err = U_INDEX_OUTOFBOUNDS_ERROR;
928 0 : return 0xffff;
929 : }
930 :
931 0 : length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
932 0 : if (length < 4)
933 : {
934 : /* got a partial character */
935 0 : uprv_memcpy(args->converter->toUBytes, mySource, length);
936 0 : args->converter->toULength = (int8_t)length;
937 0 : args->source = (const char *)(mySource + length);
938 0 : *err = U_TRUNCATED_CHAR_FOUND;
939 0 : return 0xffff;
940 : }
941 :
942 : /* Don't even try to do a direct cast because the value may be on an odd address. */
943 0 : myUChar = ((UChar32)mySource[3] << 24)
944 0 : | ((UChar32)mySource[2] << 16)
945 0 : | ((UChar32)mySource[1] << 8)
946 0 : | ((UChar32)mySource[0]);
947 :
948 0 : args->source = (const char *)(mySource + 4);
949 0 : if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
950 0 : return myUChar;
951 : }
952 :
953 0 : uprv_memcpy(args->converter->toUBytes, mySource, 4);
954 0 : args->converter->toULength = 4;
955 :
956 0 : *err = U_ILLEGAL_CHAR_FOUND;
957 0 : return 0xffff;
958 : }
959 : U_CDECL_END
960 : static const UConverterImpl _UTF32LEImpl = {
961 : UCNV_UTF32_LittleEndian,
962 :
963 : NULL,
964 : NULL,
965 :
966 : NULL,
967 : NULL,
968 : NULL,
969 :
970 : T_UConverter_toUnicode_UTF32_LE,
971 : T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
972 : T_UConverter_fromUnicode_UTF32_LE,
973 : T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
974 : T_UConverter_getNextUChar_UTF32_LE,
975 :
976 : NULL,
977 : NULL,
978 : NULL,
979 : NULL,
980 : ucnv_getNonSurrogateUnicodeSet,
981 :
982 : NULL,
983 : NULL
984 : };
985 :
986 : /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
987 : static const UConverterStaticData _UTF32LEStaticData = {
988 : sizeof(UConverterStaticData),
989 : "UTF-32LE",
990 : 1234,
991 : UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
992 : { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
993 : 0,
994 : 0,
995 : { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
996 : };
997 :
998 :
999 : const UConverterSharedData _UTF32LEData =
1000 : UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
1001 :
1002 : /* UTF-32 (Detect BOM) ------------------------------------------------------ */
1003 :
1004 : /*
1005 : * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
1006 : * accordingly.
1007 : *
1008 : * State values:
1009 : * 0 initial state
1010 : * 1 saw 00
1011 : * 2 saw 00 00
1012 : * 3 saw 00 00 FE
1013 : * 4 -
1014 : * 5 saw FF
1015 : * 6 saw FF FE
1016 : * 7 saw FF FE 00
1017 : * 8 UTF-32BE mode
1018 : * 9 UTF-32LE mode
1019 : *
1020 : * During detection: state&3==number of matching bytes so far.
1021 : *
1022 : * On output, emit U+FEFF as the first code point.
1023 : */
1024 : U_CDECL_BEGIN
1025 : static void U_CALLCONV
1026 0 : _UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
1027 0 : if(choice<=UCNV_RESET_TO_UNICODE) {
1028 : /* reset toUnicode: state=0 */
1029 0 : cnv->mode=0;
1030 : }
1031 0 : if(choice!=UCNV_RESET_TO_UNICODE) {
1032 : /* reset fromUnicode: prepare to output the UTF-32PE BOM */
1033 0 : cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
1034 : }
1035 0 : }
1036 :
1037 : static void U_CALLCONV
1038 0 : _UTF32Open(UConverter *cnv,
1039 : UConverterLoadArgs *pArgs,
1040 : UErrorCode *pErrorCode) {
1041 : (void)pArgs;
1042 : (void)pErrorCode;
1043 0 : _UTF32Reset(cnv, UCNV_RESET_BOTH);
1044 0 : }
1045 :
1046 : static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 };
1047 :
1048 : static void U_CALLCONV
1049 0 : _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
1050 : UErrorCode *pErrorCode) {
1051 0 : UConverter *cnv=pArgs->converter;
1052 0 : const char *source=pArgs->source;
1053 0 : const char *sourceLimit=pArgs->sourceLimit;
1054 0 : int32_t *offsets=pArgs->offsets;
1055 :
1056 : int32_t state, offsetDelta;
1057 : char b;
1058 :
1059 0 : state=cnv->mode;
1060 :
1061 : /*
1062 : * If we detect a BOM in this buffer, then we must add the BOM size to the
1063 : * offsets because the actual converter function will not see and count the BOM.
1064 : * offsetDelta will have the number of the BOM bytes that are in the current buffer.
1065 : */
1066 0 : offsetDelta=0;
1067 :
1068 0 : while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
1069 0 : switch(state) {
1070 : case 0:
1071 0 : b=*source;
1072 0 : if(b==0) {
1073 0 : state=1; /* could be 00 00 FE FF */
1074 0 : } else if(b==(char)0xff) {
1075 0 : state=5; /* could be FF FE 00 00 */
1076 : } else {
1077 0 : state=8; /* default to UTF-32BE */
1078 0 : continue;
1079 : }
1080 0 : ++source;
1081 0 : break;
1082 : case 1:
1083 : case 2:
1084 : case 3:
1085 : case 5:
1086 : case 6:
1087 : case 7:
1088 0 : if(*source==utf32BOM[state]) {
1089 0 : ++state;
1090 0 : ++source;
1091 0 : if(state==4) {
1092 0 : state=8; /* detect UTF-32BE */
1093 0 : offsetDelta=(int32_t)(source-pArgs->source);
1094 0 : } else if(state==8) {
1095 0 : state=9; /* detect UTF-32LE */
1096 0 : offsetDelta=(int32_t)(source-pArgs->source);
1097 : }
1098 : } else {
1099 : /* switch to UTF-32BE and pass the previous bytes */
1100 0 : int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
1101 :
1102 : /* reset the source */
1103 0 : source=pArgs->source;
1104 :
1105 0 : if(count==(state&3)) {
1106 : /* simple: all in the same buffer, just reset source */
1107 : } else {
1108 0 : UBool oldFlush=pArgs->flush;
1109 :
1110 : /* some of the bytes are from a previous buffer, replay those first */
1111 0 : pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
1112 0 : pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
1113 0 : pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
1114 :
1115 : /* no offsets: bytes from previous buffer, and not enough for output */
1116 0 : T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
1117 :
1118 : /* restore real pointers; pArgs->source will be set in case 8/9 */
1119 0 : pArgs->sourceLimit=sourceLimit;
1120 0 : pArgs->flush=oldFlush;
1121 : }
1122 0 : state=8;
1123 0 : continue;
1124 : }
1125 0 : break;
1126 : case 8:
1127 : /* call UTF-32BE */
1128 0 : pArgs->source=source;
1129 0 : if(offsets==NULL) {
1130 0 : T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
1131 : } else {
1132 0 : T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
1133 : }
1134 0 : source=pArgs->source;
1135 0 : break;
1136 : case 9:
1137 : /* call UTF-32LE */
1138 0 : pArgs->source=source;
1139 0 : if(offsets==NULL) {
1140 0 : T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
1141 : } else {
1142 0 : T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
1143 : }
1144 0 : source=pArgs->source;
1145 0 : break;
1146 : default:
1147 0 : break; /* does not occur */
1148 : }
1149 : }
1150 :
1151 : /* add BOM size to offsets - see comment at offsetDelta declaration */
1152 0 : if(offsets!=NULL && offsetDelta!=0) {
1153 0 : int32_t *offsetsLimit=pArgs->offsets;
1154 0 : while(offsets<offsetsLimit) {
1155 0 : *offsets++ += offsetDelta;
1156 : }
1157 : }
1158 :
1159 0 : pArgs->source=source;
1160 :
1161 0 : if(source==sourceLimit && pArgs->flush) {
1162 : /* handle truncated input */
1163 0 : switch(state) {
1164 : case 0:
1165 0 : break; /* no input at all, nothing to do */
1166 : case 8:
1167 0 : T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
1168 0 : break;
1169 : case 9:
1170 0 : T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
1171 0 : break;
1172 : default:
1173 : /* handle 0<state<8: call UTF-32BE with too-short input */
1174 0 : pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
1175 0 : pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
1176 :
1177 : /* no offsets: not enough for output */
1178 0 : T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
1179 0 : pArgs->source=source;
1180 0 : pArgs->sourceLimit=sourceLimit;
1181 0 : state=8;
1182 0 : break;
1183 : }
1184 : }
1185 :
1186 0 : cnv->mode=state;
1187 0 : }
1188 :
1189 : static UChar32 U_CALLCONV
1190 0 : _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
1191 : UErrorCode *pErrorCode) {
1192 0 : switch(pArgs->converter->mode) {
1193 : case 8:
1194 0 : return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
1195 : case 9:
1196 0 : return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
1197 : default:
1198 0 : return UCNV_GET_NEXT_UCHAR_USE_TO_U;
1199 : }
1200 : }
1201 : U_CDECL_END
1202 : static const UConverterImpl _UTF32Impl = {
1203 : UCNV_UTF32,
1204 :
1205 : NULL,
1206 : NULL,
1207 :
1208 : _UTF32Open,
1209 : NULL,
1210 : _UTF32Reset,
1211 :
1212 : _UTF32ToUnicodeWithOffsets,
1213 : _UTF32ToUnicodeWithOffsets,
1214 : #if U_IS_BIG_ENDIAN
1215 : T_UConverter_fromUnicode_UTF32_BE,
1216 : T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
1217 : #else
1218 : T_UConverter_fromUnicode_UTF32_LE,
1219 : T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
1220 : #endif
1221 : _UTF32GetNextUChar,
1222 :
1223 : NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
1224 : NULL,
1225 : NULL,
1226 : NULL,
1227 : ucnv_getNonSurrogateUnicodeSet,
1228 :
1229 : NULL,
1230 : NULL
1231 : };
1232 :
1233 : /* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
1234 : static const UConverterStaticData _UTF32StaticData = {
1235 : sizeof(UConverterStaticData),
1236 : "UTF-32",
1237 : 1236,
1238 : UCNV_IBM, UCNV_UTF32, 4, 4,
1239 : #if U_IS_BIG_ENDIAN
1240 : { 0, 0, 0xff, 0xfd }, 4,
1241 : #else
1242 : { 0xfd, 0xff, 0, 0 }, 4,
1243 : #endif
1244 : FALSE, FALSE,
1245 : 0,
1246 : 0,
1247 : { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
1248 : };
1249 :
1250 : const UConverterSharedData _UTF32Data =
1251 : UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
1252 :
1253 : #endif
|