LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnv_u32.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 510 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*  
       4             : **********************************************************************
       5             : *   Copyright (C) 2002-2015, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : *   file name:  ucnv_u32.c
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2002jul01
      14             : *   created by: Markus W. Scherer
      15             : *
      16             : *   UTF-32 converter implementation. Used to be in ucnv_utf.c.
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : 
      21             : #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
      22             : 
      23             : #include "unicode/ucnv.h"
      24             : #include "unicode/utf.h"
      25             : #include "ucnv_bld.h"
      26             : #include "ucnv_cnv.h"
      27             : #include "cmemory.h"
      28             : 
      29             : #define MAXIMUM_UCS2            0x0000FFFF
      30             : #define MAXIMUM_UTF             0x0010FFFF
      31             : #define HALF_SHIFT              10
      32             : #define HALF_BASE               0x0010000
      33             : #define HALF_MASK               0x3FF
      34             : #define SURROGATE_HIGH_START    0xD800
      35             : #define SURROGATE_LOW_START     0xDC00
      36             : 
      37             : /* -SURROGATE_LOW_START + HALF_BASE */
      38             : #define SURROGATE_LOW_BASE      9216
      39             : 
      40             : enum {
      41             :     UCNV_NEED_TO_WRITE_BOM=1
      42             : };
      43             : 
      44             : /* UTF-32BE ----------------------------------------------------------------- */
      45             : U_CDECL_BEGIN
      46             : static void U_CALLCONV
      47           0 : T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args,
      48             :                                 UErrorCode * err)
      49             : {
      50           0 :     const unsigned char *mySource = (unsigned char *) args->source;
      51           0 :     UChar *myTarget = args->target;
      52           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
      53           0 :     const UChar *targetLimit = args->targetLimit;
      54           0 :     unsigned char *toUBytes = args->converter->toUBytes;
      55             :     uint32_t ch, i;
      56             : 
      57             :     /* Restore state of current sequence */
      58           0 :     if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
      59           0 :         i = args->converter->toULength;       /* restore # of bytes consumed */
      60           0 :         args->converter->toULength = 0;
      61             : 
      62           0 :         ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
      63           0 :         args->converter->toUnicodeStatus = 0;
      64           0 :         goto morebytes;
      65             :     }
      66             : 
      67           0 :     while (mySource < sourceLimit && myTarget < targetLimit) {
      68           0 :         i = 0;
      69           0 :         ch = 0;
      70             : morebytes:
      71           0 :         while (i < sizeof(uint32_t)) {
      72           0 :             if (mySource < sourceLimit) {
      73           0 :                 ch = (ch << 8) | (uint8_t)(*mySource);
      74           0 :                 toUBytes[i++] = (char) *(mySource++);
      75             :             }
      76             :             else {
      77             :                 /* stores a partially calculated target*/
      78             :                 /* + 1 to make 0 a valid character */
      79           0 :                 args->converter->toUnicodeStatus = ch + 1;
      80           0 :                 args->converter->toULength = (int8_t) i;
      81           0 :                 goto donefornow;
      82             :             }
      83             :         }
      84             : 
      85           0 :         if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
      86             :             /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
      87           0 :             if (ch <= MAXIMUM_UCS2) 
      88             :             {
      89             :                 /* fits in 16 bits */
      90           0 :                 *(myTarget++) = (UChar) ch;
      91             :             }
      92             :             else {
      93             :                 /* write out the surrogates */
      94           0 :                 *(myTarget++) = U16_LEAD(ch);
      95           0 :                 ch = U16_TRAIL(ch);
      96           0 :                 if (myTarget < targetLimit) {
      97           0 :                     *(myTarget++) = (UChar)ch;
      98             :                 }
      99             :                 else {
     100             :                     /* Put in overflow buffer (not handled here) */
     101           0 :                     args->converter->UCharErrorBuffer[0] = (UChar) ch;
     102           0 :                     args->converter->UCharErrorBufferLength = 1;
     103           0 :                     *err = U_BUFFER_OVERFLOW_ERROR;
     104           0 :                     break;
     105             :                 }
     106             :             }
     107             :         }
     108             :         else {
     109           0 :             args->converter->toULength = (int8_t)i;
     110           0 :             *err = U_ILLEGAL_CHAR_FOUND;
     111           0 :             break;
     112             :         }
     113             :     }
     114             : 
     115             : donefornow:
     116           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
     117             :         /* End of target buffer */
     118           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     119             :     }
     120             : 
     121           0 :     args->target = myTarget;
     122           0 :     args->source = (const char *) mySource;
     123           0 : }
     124             : 
     125             : static void U_CALLCONV
     126           0 : T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
     127             :                                              UErrorCode * err)
     128             : {
     129           0 :     const unsigned char *mySource = (unsigned char *) args->source;
     130           0 :     UChar *myTarget = args->target;
     131           0 :     int32_t *myOffsets = args->offsets;
     132           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     133           0 :     const UChar *targetLimit = args->targetLimit;
     134           0 :     unsigned char *toUBytes = args->converter->toUBytes;
     135             :     uint32_t ch, i;
     136           0 :     int32_t offsetNum = 0;
     137             : 
     138             :     /* Restore state of current sequence */
     139           0 :     if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
     140           0 :         i = args->converter->toULength;       /* restore # of bytes consumed */
     141           0 :         args->converter->toULength = 0;
     142             : 
     143           0 :         ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
     144           0 :         args->converter->toUnicodeStatus = 0;
     145           0 :         goto morebytes;
     146             :     }
     147             : 
     148           0 :     while (mySource < sourceLimit && myTarget < targetLimit) {
     149           0 :         i = 0;
     150           0 :         ch = 0;
     151             : morebytes:
     152           0 :         while (i < sizeof(uint32_t)) {
     153           0 :             if (mySource < sourceLimit) {
     154           0 :                 ch = (ch << 8) | (uint8_t)(*mySource);
     155           0 :                 toUBytes[i++] = (char) *(mySource++);
     156             :             }
     157             :             else {
     158             :                 /* stores a partially calculated target*/
     159             :                 /* + 1 to make 0 a valid character */
     160           0 :                 args->converter->toUnicodeStatus = ch + 1;
     161           0 :                 args->converter->toULength = (int8_t) i;
     162           0 :                 goto donefornow;
     163             :             }
     164             :         }
     165             : 
     166           0 :         if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
     167             :             /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
     168           0 :             if (ch <= MAXIMUM_UCS2) {
     169             :                 /* fits in 16 bits */
     170           0 :                 *(myTarget++) = (UChar) ch;
     171           0 :                 *(myOffsets++) = offsetNum;
     172             :             }
     173             :             else {
     174             :                 /* write out the surrogates */
     175           0 :                 *(myTarget++) = U16_LEAD(ch);
     176           0 :                 *myOffsets++ = offsetNum;
     177           0 :                 ch = U16_TRAIL(ch);
     178           0 :                 if (myTarget < targetLimit)
     179             :                 {
     180           0 :                     *(myTarget++) = (UChar)ch;
     181           0 :                     *(myOffsets++) = offsetNum;
     182             :                 }
     183             :                 else {
     184             :                     /* Put in overflow buffer (not handled here) */
     185           0 :                     args->converter->UCharErrorBuffer[0] = (UChar) ch;
     186           0 :                     args->converter->UCharErrorBufferLength = 1;
     187           0 :                     *err = U_BUFFER_OVERFLOW_ERROR;
     188           0 :                     break;
     189             :                 }
     190             :             }
     191             :         }
     192             :         else {
     193           0 :             args->converter->toULength = (int8_t)i;
     194           0 :             *err = U_ILLEGAL_CHAR_FOUND;
     195           0 :             break;
     196             :         }
     197           0 :         offsetNum += i;
     198             :     }
     199             : 
     200             : donefornow:
     201           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     202             :     {
     203             :         /* End of target buffer */
     204           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     205             :     }
     206             : 
     207           0 :     args->target = myTarget;
     208           0 :     args->source = (const char *) mySource;
     209           0 :     args->offsets = myOffsets;
     210           0 : }
     211             : 
     212             : static void U_CALLCONV
     213           0 : T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args,
     214             :                                   UErrorCode * err)
     215             : {
     216           0 :     const UChar *mySource = args->source;
     217             :     unsigned char *myTarget;
     218           0 :     const UChar *sourceLimit = args->sourceLimit;
     219           0 :     const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
     220             :     UChar32 ch, ch2;
     221             :     unsigned int indexToWrite;
     222             :     unsigned char temp[sizeof(uint32_t)];
     223             : 
     224           0 :     if(mySource >= sourceLimit) {
     225             :         /* no input, nothing to do */
     226           0 :         return;
     227             :     }
     228             : 
     229             :     /* write the BOM if necessary */
     230           0 :     if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
     231             :         static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
     232           0 :         ucnv_fromUWriteBytes(args->converter,
     233             :                              bom, 4,
     234             :                              &args->target, args->targetLimit,
     235             :                              &args->offsets, -1,
     236           0 :                              err);
     237           0 :         args->converter->fromUnicodeStatus=0;
     238             :     }
     239             : 
     240           0 :     myTarget = (unsigned char *) args->target;
     241           0 :     temp[0] = 0;
     242             : 
     243           0 :     if (args->converter->fromUChar32) {
     244           0 :         ch = args->converter->fromUChar32;
     245           0 :         args->converter->fromUChar32 = 0;
     246           0 :         goto lowsurogate;
     247             :     }
     248             : 
     249           0 :     while (mySource < sourceLimit && myTarget < targetLimit) {
     250           0 :         ch = *(mySource++);
     251             : 
     252           0 :         if (U_IS_SURROGATE(ch)) {
     253           0 :             if (U_IS_LEAD(ch)) {
     254             : lowsurogate:
     255           0 :                 if (mySource < sourceLimit) {
     256           0 :                     ch2 = *mySource;
     257           0 :                     if (U_IS_TRAIL(ch2)) {
     258           0 :                         ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
     259           0 :                         mySource++;
     260             :                     }
     261             :                     else {
     262             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     263             :                         /* callback(illegal) */
     264           0 :                         args->converter->fromUChar32 = ch;
     265           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     266           0 :                         break;
     267             :                     }
     268             :                 }
     269             :                 else {
     270             :                     /* ran out of source */
     271           0 :                     args->converter->fromUChar32 = ch;
     272           0 :                     if (args->flush) {
     273             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     274             :                         /* callback(illegal) */
     275           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     276             :                     }
     277           0 :                     break;
     278             :                 }
     279             :             }
     280             :             else {
     281             :                 /* this is an unmatched trail code unit (2nd surrogate) */
     282             :                 /* callback(illegal) */
     283           0 :                 args->converter->fromUChar32 = ch;
     284           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     285           0 :                 break;
     286             :             }
     287             :         }
     288             : 
     289             :         /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
     290           0 :         temp[1] = (uint8_t) (ch >> 16 & 0x1F);
     291           0 :         temp[2] = (uint8_t) (ch >> 8);  /* unsigned cast implicitly does (ch & FF) */
     292           0 :         temp[3] = (uint8_t) (ch);       /* unsigned cast implicitly does (ch & FF) */
     293             : 
     294           0 :         for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
     295           0 :             if (myTarget < targetLimit) {
     296           0 :                 *(myTarget++) = temp[indexToWrite];
     297             :             }
     298             :             else {
     299           0 :                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
     300           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     301             :             }
     302             :         }
     303             :     }
     304             : 
     305           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
     306           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     307             :     }
     308             : 
     309           0 :     args->target = (char *) myTarget;
     310           0 :     args->source = mySource;
     311             : }
     312             : 
     313             : static void U_CALLCONV
     314           0 : T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
     315             :                                                UErrorCode * err)
     316             : {
     317           0 :     const UChar *mySource = args->source;
     318             :     unsigned char *myTarget;
     319             :     int32_t *myOffsets;
     320           0 :     const UChar *sourceLimit = args->sourceLimit;
     321           0 :     const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
     322             :     UChar32 ch, ch2;
     323           0 :     int32_t offsetNum = 0;
     324             :     unsigned int indexToWrite;
     325             :     unsigned char temp[sizeof(uint32_t)];
     326             : 
     327           0 :     if(mySource >= sourceLimit) {
     328             :         /* no input, nothing to do */
     329           0 :         return;
     330             :     }
     331             : 
     332             :     /* write the BOM if necessary */
     333           0 :     if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
     334             :         static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
     335           0 :         ucnv_fromUWriteBytes(args->converter,
     336             :                              bom, 4,
     337             :                              &args->target, args->targetLimit,
     338             :                              &args->offsets, -1,
     339           0 :                              err);
     340           0 :         args->converter->fromUnicodeStatus=0;
     341             :     }
     342             : 
     343           0 :     myTarget = (unsigned char *) args->target;
     344           0 :     myOffsets = args->offsets;
     345           0 :     temp[0] = 0;
     346             : 
     347           0 :     if (args->converter->fromUChar32) {
     348           0 :         ch = args->converter->fromUChar32;
     349           0 :         args->converter->fromUChar32 = 0;
     350           0 :         goto lowsurogate;
     351             :     }
     352             : 
     353           0 :     while (mySource < sourceLimit && myTarget < targetLimit) {
     354           0 :         ch = *(mySource++);
     355             : 
     356           0 :         if (U_IS_SURROGATE(ch)) {
     357           0 :             if (U_IS_LEAD(ch)) {
     358             : lowsurogate:
     359           0 :                 if (mySource < sourceLimit) {
     360           0 :                     ch2 = *mySource;
     361           0 :                     if (U_IS_TRAIL(ch2)) {
     362           0 :                         ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
     363           0 :                         mySource++;
     364             :                     }
     365             :                     else {
     366             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     367             :                         /* callback(illegal) */
     368           0 :                         args->converter->fromUChar32 = ch;
     369           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     370           0 :                         break;
     371             :                     }
     372             :                 }
     373             :                 else {
     374             :                     /* ran out of source */
     375           0 :                     args->converter->fromUChar32 = ch;
     376           0 :                     if (args->flush) {
     377             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     378             :                         /* callback(illegal) */
     379           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     380             :                     }
     381           0 :                     break;
     382             :                 }
     383             :             }
     384             :             else {
     385             :                 /* this is an unmatched trail code unit (2nd surrogate) */
     386             :                 /* callback(illegal) */
     387           0 :                 args->converter->fromUChar32 = ch;
     388           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     389           0 :                 break;
     390             :             }
     391             :         }
     392             : 
     393             :         /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
     394           0 :         temp[1] = (uint8_t) (ch >> 16 & 0x1F);
     395           0 :         temp[2] = (uint8_t) (ch >> 8);  /* unsigned cast implicitly does (ch & FF) */
     396           0 :         temp[3] = (uint8_t) (ch);       /* unsigned cast implicitly does (ch & FF) */
     397             : 
     398           0 :         for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) {
     399           0 :             if (myTarget < targetLimit) {
     400           0 :                 *(myTarget++) = temp[indexToWrite];
     401           0 :                 *(myOffsets++) = offsetNum;
     402             :             }
     403             :             else {
     404           0 :                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
     405           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     406             :             }
     407             :         }
     408           0 :         offsetNum = offsetNum + 1 + (temp[1] != 0);
     409             :     }
     410             : 
     411           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
     412           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     413             :     }
     414             : 
     415           0 :     args->target = (char *) myTarget;
     416           0 :     args->source = mySource;
     417           0 :     args->offsets = myOffsets;
     418             : }
     419             : 
     420             : static UChar32 U_CALLCONV
     421           0 : T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args,
     422             :                                    UErrorCode* err)
     423             : {
     424             :     const uint8_t *mySource;
     425             :     UChar32 myUChar;
     426             :     int32_t length;
     427             : 
     428           0 :     mySource = (const uint8_t *)args->source;
     429           0 :     if (mySource >= (const uint8_t *)args->sourceLimit)
     430             :     {
     431             :         /* no input */
     432           0 :         *err = U_INDEX_OUTOFBOUNDS_ERROR;
     433           0 :         return 0xffff;
     434             :     }
     435             : 
     436           0 :     length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
     437           0 :     if (length < 4) 
     438             :     {
     439             :         /* got a partial character */
     440           0 :         uprv_memcpy(args->converter->toUBytes, mySource, length);
     441           0 :         args->converter->toULength = (int8_t)length;
     442           0 :         args->source = (const char *)(mySource + length);
     443           0 :         *err = U_TRUNCATED_CHAR_FOUND;
     444           0 :         return 0xffff;
     445             :     }
     446             : 
     447             :     /* Don't even try to do a direct cast because the value may be on an odd address. */
     448           0 :     myUChar = ((UChar32)mySource[0] << 24)
     449           0 :             | ((UChar32)mySource[1] << 16)
     450           0 :             | ((UChar32)mySource[2] << 8)
     451           0 :             | ((UChar32)mySource[3]);
     452             : 
     453           0 :     args->source = (const char *)(mySource + 4);
     454           0 :     if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
     455           0 :         return myUChar;
     456             :     }
     457             : 
     458           0 :     uprv_memcpy(args->converter->toUBytes, mySource, 4);
     459           0 :     args->converter->toULength = 4;
     460             : 
     461           0 :     *err = U_ILLEGAL_CHAR_FOUND;
     462           0 :     return 0xffff;
     463             : }
     464             : U_CDECL_END
     465             : static const UConverterImpl _UTF32BEImpl = {
     466             :     UCNV_UTF32_BigEndian,
     467             : 
     468             :     NULL,
     469             :     NULL,
     470             : 
     471             :     NULL,
     472             :     NULL,
     473             :     NULL,
     474             : 
     475             :     T_UConverter_toUnicode_UTF32_BE,
     476             :     T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC,
     477             :     T_UConverter_fromUnicode_UTF32_BE,
     478             :     T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
     479             :     T_UConverter_getNextUChar_UTF32_BE,
     480             : 
     481             :     NULL,
     482             :     NULL,
     483             :     NULL,
     484             :     NULL,
     485             :     ucnv_getNonSurrogateUnicodeSet,
     486             : 
     487             :     NULL,
     488             :     NULL
     489             : };
     490             : 
     491             : /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
     492             : static const UConverterStaticData _UTF32BEStaticData = {
     493             :     sizeof(UConverterStaticData),
     494             :     "UTF-32BE",
     495             :     1232,
     496             :     UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4,
     497             :     { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE,
     498             :     0,
     499             :     0,
     500             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     501             : };
     502             : 
     503             : const UConverterSharedData _UTF32BEData =
     504             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
     505             : 
     506             : /* UTF-32LE ---------------------------------------------------------- */
     507             : U_CDECL_BEGIN
     508             : static void U_CALLCONV
     509           0 : T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args,
     510             :                                 UErrorCode * err)
     511             : {
     512           0 :     const unsigned char *mySource = (unsigned char *) args->source;
     513           0 :     UChar *myTarget = args->target;
     514           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     515           0 :     const UChar *targetLimit = args->targetLimit;
     516           0 :     unsigned char *toUBytes = args->converter->toUBytes;
     517             :     uint32_t ch, i;
     518             : 
     519             :     /* Restore state of current sequence */
     520           0 :     if (args->converter->toUnicodeStatus && myTarget < targetLimit)
     521             :     {
     522           0 :         i = args->converter->toULength;       /* restore # of bytes consumed */
     523           0 :         args->converter->toULength = 0;
     524             : 
     525             :         /* Stores the previously calculated ch from a previous call*/
     526           0 :         ch = args->converter->toUnicodeStatus - 1;
     527           0 :         args->converter->toUnicodeStatus = 0;
     528           0 :         goto morebytes;
     529             :     }
     530             : 
     531           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     532             :     {
     533           0 :         i = 0;
     534           0 :         ch = 0;
     535             : morebytes:
     536           0 :         while (i < sizeof(uint32_t))
     537             :         {
     538           0 :             if (mySource < sourceLimit)
     539             :             {
     540           0 :                 ch |= ((uint8_t)(*mySource)) << (i * 8);
     541           0 :                 toUBytes[i++] = (char) *(mySource++);
     542             :             }
     543             :             else
     544             :             {
     545             :                 /* stores a partially calculated target*/
     546             :                 /* + 1 to make 0 a valid character */
     547           0 :                 args->converter->toUnicodeStatus = ch + 1;
     548           0 :                 args->converter->toULength = (int8_t) i;
     549           0 :                 goto donefornow;
     550             :             }
     551             :         }
     552             : 
     553           0 :         if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) {
     554             :             /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
     555           0 :             if (ch <= MAXIMUM_UCS2) {
     556             :                 /* fits in 16 bits */
     557           0 :                 *(myTarget++) = (UChar) ch;
     558             :             }
     559             :             else {
     560             :                 /* write out the surrogates */
     561           0 :                 *(myTarget++) = U16_LEAD(ch);
     562           0 :                 ch = U16_TRAIL(ch);
     563           0 :                 if (myTarget < targetLimit) {
     564           0 :                     *(myTarget++) = (UChar)ch;
     565             :                 }
     566             :                 else {
     567             :                     /* Put in overflow buffer (not handled here) */
     568           0 :                     args->converter->UCharErrorBuffer[0] = (UChar) ch;
     569           0 :                     args->converter->UCharErrorBufferLength = 1;
     570           0 :                     *err = U_BUFFER_OVERFLOW_ERROR;
     571           0 :                     break;
     572             :                 }
     573             :             }
     574             :         }
     575             :         else {
     576           0 :             args->converter->toULength = (int8_t)i;
     577           0 :             *err = U_ILLEGAL_CHAR_FOUND;
     578           0 :             break;
     579             :         }
     580             :     }
     581             : 
     582             : donefornow:
     583           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     584             :     {
     585             :         /* End of target buffer */
     586           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     587             :     }
     588             : 
     589           0 :     args->target = myTarget;
     590           0 :     args->source = (const char *) mySource;
     591           0 : }
     592             : 
     593             : static void U_CALLCONV
     594           0 : T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args,
     595             :                                              UErrorCode * err)
     596             : {
     597           0 :     const unsigned char *mySource = (unsigned char *) args->source;
     598           0 :     UChar *myTarget = args->target;
     599           0 :     int32_t *myOffsets = args->offsets;
     600           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     601           0 :     const UChar *targetLimit = args->targetLimit;
     602           0 :     unsigned char *toUBytes = args->converter->toUBytes;
     603             :     uint32_t ch, i;
     604           0 :     int32_t offsetNum = 0;
     605             : 
     606             :     /* Restore state of current sequence */
     607           0 :     if (args->converter->toUnicodeStatus && myTarget < targetLimit)
     608             :     {
     609           0 :         i = args->converter->toULength;       /* restore # of bytes consumed */
     610           0 :         args->converter->toULength = 0;
     611             : 
     612             :         /* Stores the previously calculated ch from a previous call*/
     613           0 :         ch = args->converter->toUnicodeStatus - 1;
     614           0 :         args->converter->toUnicodeStatus = 0;
     615           0 :         goto morebytes;
     616             :     }
     617             : 
     618           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     619             :     {
     620           0 :         i = 0;
     621           0 :         ch = 0;
     622             : morebytes:
     623           0 :         while (i < sizeof(uint32_t))
     624             :         {
     625           0 :             if (mySource < sourceLimit)
     626             :             {
     627           0 :                 ch |= ((uint8_t)(*mySource)) << (i * 8);
     628           0 :                 toUBytes[i++] = (char) *(mySource++);
     629             :             }
     630             :             else
     631             :             {
     632             :                 /* stores a partially calculated target*/
     633             :                 /* + 1 to make 0 a valid character */
     634           0 :                 args->converter->toUnicodeStatus = ch + 1;
     635           0 :                 args->converter->toULength = (int8_t) i;
     636           0 :                 goto donefornow;
     637             :             }
     638             :         }
     639             : 
     640           0 :         if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch))
     641             :         {
     642             :             /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
     643           0 :             if (ch <= MAXIMUM_UCS2) 
     644             :             {
     645             :                 /* fits in 16 bits */
     646           0 :                 *(myTarget++) = (UChar) ch;
     647           0 :                 *(myOffsets++) = offsetNum;
     648             :             }
     649             :             else {
     650             :                 /* write out the surrogates */
     651           0 :                 *(myTarget++) = U16_LEAD(ch);
     652           0 :                 *(myOffsets++) = offsetNum;
     653           0 :                 ch = U16_TRAIL(ch);
     654           0 :                 if (myTarget < targetLimit)
     655             :                 {
     656           0 :                     *(myTarget++) = (UChar)ch;
     657           0 :                     *(myOffsets++) = offsetNum;
     658             :                 }
     659             :                 else
     660             :                 {
     661             :                     /* Put in overflow buffer (not handled here) */
     662           0 :                     args->converter->UCharErrorBuffer[0] = (UChar) ch;
     663           0 :                     args->converter->UCharErrorBufferLength = 1;
     664           0 :                     *err = U_BUFFER_OVERFLOW_ERROR;
     665           0 :                     break;
     666             :                 }
     667             :             }
     668             :         }
     669             :         else
     670             :         {
     671           0 :             args->converter->toULength = (int8_t)i;
     672           0 :             *err = U_ILLEGAL_CHAR_FOUND;
     673           0 :             break;
     674             :         }
     675           0 :         offsetNum += i;
     676             :     }
     677             : 
     678             : donefornow:
     679           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     680             :     {
     681             :         /* End of target buffer */
     682           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     683             :     }
     684             : 
     685           0 :     args->target = myTarget;
     686           0 :     args->source = (const char *) mySource;
     687           0 :     args->offsets = myOffsets;
     688           0 : }
     689             : 
     690             : static void U_CALLCONV
     691           0 : T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args,
     692             :                                   UErrorCode * err)
     693             : {
     694           0 :     const UChar *mySource = args->source;
     695             :     unsigned char *myTarget;
     696           0 :     const UChar *sourceLimit = args->sourceLimit;
     697           0 :     const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
     698             :     UChar32 ch, ch2;
     699             :     unsigned int indexToWrite;
     700             :     unsigned char temp[sizeof(uint32_t)];
     701             : 
     702           0 :     if(mySource >= sourceLimit) {
     703             :         /* no input, nothing to do */
     704           0 :         return;
     705             :     }
     706             : 
     707             :     /* write the BOM if necessary */
     708           0 :     if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
     709             :         static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
     710           0 :         ucnv_fromUWriteBytes(args->converter,
     711             :                              bom, 4,
     712             :                              &args->target, args->targetLimit,
     713             :                              &args->offsets, -1,
     714           0 :                              err);
     715           0 :         args->converter->fromUnicodeStatus=0;
     716             :     }
     717             : 
     718           0 :     myTarget = (unsigned char *) args->target;
     719           0 :     temp[3] = 0;
     720             : 
     721           0 :     if (args->converter->fromUChar32)
     722             :     {
     723           0 :         ch = args->converter->fromUChar32;
     724           0 :         args->converter->fromUChar32 = 0;
     725           0 :         goto lowsurogate;
     726             :     }
     727             : 
     728           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     729             :     {
     730           0 :         ch = *(mySource++);
     731             : 
     732           0 :         if (U16_IS_SURROGATE(ch)) {
     733           0 :             if (U16_IS_LEAD(ch))
     734             :             {
     735             : lowsurogate:
     736           0 :                 if (mySource < sourceLimit)
     737             :                 {
     738           0 :                     ch2 = *mySource;
     739           0 :                     if (U16_IS_TRAIL(ch2)) {
     740           0 :                         ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
     741           0 :                         mySource++;
     742             :                     }
     743             :                     else {
     744             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     745             :                         /* callback(illegal) */
     746           0 :                         args->converter->fromUChar32 = ch;
     747           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     748           0 :                         break;
     749             :                     }
     750             :                 }
     751             :                 else {
     752             :                     /* ran out of source */
     753           0 :                     args->converter->fromUChar32 = ch;
     754           0 :                     if (args->flush) {
     755             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     756             :                         /* callback(illegal) */
     757           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     758             :                     }
     759           0 :                     break;
     760             :                 }
     761             :             }
     762             :             else {
     763             :                 /* this is an unmatched trail code unit (2nd surrogate) */
     764             :                 /* callback(illegal) */
     765           0 :                 args->converter->fromUChar32 = ch;
     766           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     767           0 :                 break;
     768             :             }
     769             :         }
     770             : 
     771             :         /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
     772           0 :         temp[2] = (uint8_t) (ch >> 16 & 0x1F);
     773           0 :         temp[1] = (uint8_t) (ch >> 8);  /* unsigned cast implicitly does (ch & FF) */
     774           0 :         temp[0] = (uint8_t) (ch);       /* unsigned cast implicitly does (ch & FF) */
     775             : 
     776           0 :         for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
     777             :         {
     778           0 :             if (myTarget < targetLimit)
     779             :             {
     780           0 :                 *(myTarget++) = temp[indexToWrite];
     781             :             }
     782             :             else
     783             :             {
     784           0 :                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
     785           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     786             :             }
     787             :         }
     788             :     }
     789             : 
     790           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     791             :     {
     792           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     793             :     }
     794             : 
     795           0 :     args->target = (char *) myTarget;
     796           0 :     args->source = mySource;
     797             : }
     798             : 
     799             : static void U_CALLCONV
     800           0 : T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args,
     801             :                                                UErrorCode * err)
     802             : {
     803           0 :     const UChar *mySource = args->source;
     804             :     unsigned char *myTarget;
     805             :     int32_t *myOffsets;
     806           0 :     const UChar *sourceLimit = args->sourceLimit;
     807           0 :     const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
     808             :     UChar32 ch, ch2;
     809             :     unsigned int indexToWrite;
     810             :     unsigned char temp[sizeof(uint32_t)];
     811           0 :     int32_t offsetNum = 0;
     812             : 
     813           0 :     if(mySource >= sourceLimit) {
     814             :         /* no input, nothing to do */
     815           0 :         return;
     816             :     }
     817             : 
     818             :     /* write the BOM if necessary */
     819           0 :     if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
     820             :         static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
     821           0 :         ucnv_fromUWriteBytes(args->converter,
     822             :                              bom, 4,
     823             :                              &args->target, args->targetLimit,
     824             :                              &args->offsets, -1,
     825           0 :                              err);
     826           0 :         args->converter->fromUnicodeStatus=0;
     827             :     }
     828             : 
     829           0 :     myTarget = (unsigned char *) args->target;
     830           0 :     myOffsets = args->offsets;
     831           0 :     temp[3] = 0;
     832             : 
     833           0 :     if (args->converter->fromUChar32)
     834             :     {
     835           0 :         ch = args->converter->fromUChar32;
     836           0 :         args->converter->fromUChar32 = 0;
     837           0 :         goto lowsurogate;
     838             :     }
     839             : 
     840           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     841             :     {
     842           0 :         ch = *(mySource++);
     843             : 
     844           0 :         if (U16_IS_SURROGATE(ch)) {
     845           0 :             if (U16_IS_LEAD(ch))
     846             :             {
     847             : lowsurogate:
     848           0 :                 if (mySource < sourceLimit)
     849             :                 {
     850           0 :                     ch2 = *mySource;
     851           0 :                     if (U16_IS_TRAIL(ch2))
     852             :                     {
     853           0 :                         ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
     854           0 :                         mySource++;
     855             :                     }
     856             :                     else {
     857             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     858             :                         /* callback(illegal) */
     859           0 :                         args->converter->fromUChar32 = ch;
     860           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     861           0 :                         break;
     862             :                     }
     863             :                 }
     864             :                 else {
     865             :                     /* ran out of source */
     866           0 :                     args->converter->fromUChar32 = ch;
     867           0 :                     if (args->flush) {
     868             :                         /* this is an unmatched trail code unit (2nd surrogate) */
     869             :                         /* callback(illegal) */
     870           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     871             :                     }
     872           0 :                     break;
     873             :                 }
     874             :             }
     875             :             else {
     876             :                 /* this is an unmatched trail code unit (2nd surrogate) */
     877             :                 /* callback(illegal) */
     878           0 :                 args->converter->fromUChar32 = ch;
     879           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     880           0 :                 break;
     881             :             }
     882             :         }
     883             : 
     884             :         /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */
     885           0 :         temp[2] = (uint8_t) (ch >> 16 & 0x1F);
     886           0 :         temp[1] = (uint8_t) (ch >> 8);  /* unsigned cast implicitly does (ch & FF) */
     887           0 :         temp[0] = (uint8_t) (ch);       /* unsigned cast implicitly does (ch & FF) */
     888             : 
     889           0 :         for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++)
     890             :         {
     891           0 :             if (myTarget < targetLimit)
     892             :             {
     893           0 :                 *(myTarget++) = temp[indexToWrite];
     894           0 :                 *(myOffsets++) = offsetNum;
     895             :             }
     896             :             else
     897             :             {
     898           0 :                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite];
     899           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     900             :             }
     901             :         }
     902           0 :         offsetNum = offsetNum + 1 + (temp[2] != 0);
     903             :     }
     904             : 
     905           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     906             :     {
     907           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     908             :     }
     909             : 
     910           0 :     args->target = (char *) myTarget;
     911           0 :     args->source = mySource;
     912           0 :     args->offsets = myOffsets;
     913             : }
     914             : 
     915             : static UChar32 U_CALLCONV
     916           0 : T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args,
     917             :                                    UErrorCode* err)
     918             : {
     919             :     const uint8_t *mySource;
     920             :     UChar32 myUChar;
     921             :     int32_t length;
     922             : 
     923           0 :     mySource = (const uint8_t *)args->source;
     924           0 :     if (mySource >= (const uint8_t *)args->sourceLimit)
     925             :     {
     926             :         /* no input */
     927           0 :         *err = U_INDEX_OUTOFBOUNDS_ERROR;
     928           0 :         return 0xffff;
     929             :     }
     930             : 
     931           0 :     length = (int32_t)((const uint8_t *)args->sourceLimit - mySource);
     932           0 :     if (length < 4) 
     933             :     {
     934             :         /* got a partial character */
     935           0 :         uprv_memcpy(args->converter->toUBytes, mySource, length);
     936           0 :         args->converter->toULength = (int8_t)length;
     937           0 :         args->source = (const char *)(mySource + length);
     938           0 :         *err = U_TRUNCATED_CHAR_FOUND;
     939           0 :         return 0xffff;
     940             :     }
     941             : 
     942             :     /* Don't even try to do a direct cast because the value may be on an odd address. */
     943           0 :     myUChar = ((UChar32)mySource[3] << 24)
     944           0 :             | ((UChar32)mySource[2] << 16)
     945           0 :             | ((UChar32)mySource[1] << 8)
     946           0 :             | ((UChar32)mySource[0]);
     947             : 
     948           0 :     args->source = (const char *)(mySource + 4);
     949           0 :     if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) {
     950           0 :         return myUChar;
     951             :     }
     952             : 
     953           0 :     uprv_memcpy(args->converter->toUBytes, mySource, 4);
     954           0 :     args->converter->toULength = 4;
     955             : 
     956           0 :     *err = U_ILLEGAL_CHAR_FOUND;
     957           0 :     return 0xffff;
     958             : }
     959             : U_CDECL_END
     960             : static const UConverterImpl _UTF32LEImpl = {
     961             :     UCNV_UTF32_LittleEndian,
     962             : 
     963             :     NULL,
     964             :     NULL,
     965             : 
     966             :     NULL,
     967             :     NULL,
     968             :     NULL,
     969             : 
     970             :     T_UConverter_toUnicode_UTF32_LE,
     971             :     T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC,
     972             :     T_UConverter_fromUnicode_UTF32_LE,
     973             :     T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
     974             :     T_UConverter_getNextUChar_UTF32_LE,
     975             : 
     976             :     NULL,
     977             :     NULL,
     978             :     NULL,
     979             :     NULL,
     980             :     ucnv_getNonSurrogateUnicodeSet,
     981             : 
     982             :     NULL,
     983             :     NULL
     984             : };
     985             : 
     986             : /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
     987             : static const UConverterStaticData _UTF32LEStaticData = {
     988             :     sizeof(UConverterStaticData),
     989             :     "UTF-32LE",
     990             :     1234,
     991             :     UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4,
     992             :     { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE,
     993             :     0,
     994             :     0,
     995             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     996             : };
     997             : 
     998             : 
     999             : const UConverterSharedData _UTF32LEData =
    1000             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
    1001             : 
    1002             : /* UTF-32 (Detect BOM) ------------------------------------------------------ */
    1003             : 
    1004             : /*
    1005             :  * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE
    1006             :  * accordingly.
    1007             :  *
    1008             :  * State values:
    1009             :  * 0    initial state
    1010             :  * 1    saw 00
    1011             :  * 2    saw 00 00
    1012             :  * 3    saw 00 00 FE
    1013             :  * 4    -
    1014             :  * 5    saw FF
    1015             :  * 6    saw FF FE
    1016             :  * 7    saw FF FE 00
    1017             :  * 8    UTF-32BE mode
    1018             :  * 9    UTF-32LE mode
    1019             :  *
    1020             :  * During detection: state&3==number of matching bytes so far.
    1021             :  *
    1022             :  * On output, emit U+FEFF as the first code point.
    1023             :  */
    1024             : U_CDECL_BEGIN
    1025             : static void U_CALLCONV
    1026           0 : _UTF32Reset(UConverter *cnv, UConverterResetChoice choice) {
    1027           0 :     if(choice<=UCNV_RESET_TO_UNICODE) {
    1028             :         /* reset toUnicode: state=0 */
    1029           0 :         cnv->mode=0;
    1030             :     }
    1031           0 :     if(choice!=UCNV_RESET_TO_UNICODE) {
    1032             :         /* reset fromUnicode: prepare to output the UTF-32PE BOM */
    1033           0 :         cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
    1034             :     }
    1035           0 : }
    1036             : 
    1037             : static void U_CALLCONV
    1038           0 : _UTF32Open(UConverter *cnv,
    1039             :            UConverterLoadArgs *pArgs,
    1040             :            UErrorCode *pErrorCode) {
    1041             :     (void)pArgs;
    1042             :     (void)pErrorCode;
    1043           0 :     _UTF32Reset(cnv, UCNV_RESET_BOTH);
    1044           0 : }
    1045             : 
    1046             : static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff,    (char)0xff, (char)0xfe, 0, 0 };
    1047             : 
    1048             : static void U_CALLCONV
    1049           0 : _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    1050             :                            UErrorCode *pErrorCode) {
    1051           0 :     UConverter *cnv=pArgs->converter;
    1052           0 :     const char *source=pArgs->source;
    1053           0 :     const char *sourceLimit=pArgs->sourceLimit;
    1054           0 :     int32_t *offsets=pArgs->offsets;
    1055             : 
    1056             :     int32_t state, offsetDelta;
    1057             :     char b;
    1058             : 
    1059           0 :     state=cnv->mode;
    1060             : 
    1061             :     /*
    1062             :      * If we detect a BOM in this buffer, then we must add the BOM size to the
    1063             :      * offsets because the actual converter function will not see and count the BOM.
    1064             :      * offsetDelta will have the number of the BOM bytes that are in the current buffer.
    1065             :      */
    1066           0 :     offsetDelta=0;
    1067             : 
    1068           0 :     while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
    1069           0 :         switch(state) {
    1070             :         case 0:
    1071           0 :             b=*source;
    1072           0 :             if(b==0) {
    1073           0 :                 state=1; /* could be 00 00 FE FF */
    1074           0 :             } else if(b==(char)0xff) {
    1075           0 :                 state=5; /* could be FF FE 00 00 */
    1076             :             } else {
    1077           0 :                 state=8; /* default to UTF-32BE */
    1078           0 :                 continue;
    1079             :             }
    1080           0 :             ++source;
    1081           0 :             break;
    1082             :         case 1:
    1083             :         case 2:
    1084             :         case 3:
    1085             :         case 5:
    1086             :         case 6:
    1087             :         case 7:
    1088           0 :             if(*source==utf32BOM[state]) {
    1089           0 :                 ++state;
    1090           0 :                 ++source;
    1091           0 :                 if(state==4) {
    1092           0 :                     state=8; /* detect UTF-32BE */
    1093           0 :                     offsetDelta=(int32_t)(source-pArgs->source);
    1094           0 :                 } else if(state==8) {
    1095           0 :                     state=9; /* detect UTF-32LE */
    1096           0 :                     offsetDelta=(int32_t)(source-pArgs->source);
    1097             :                 }
    1098             :             } else {
    1099             :                 /* switch to UTF-32BE and pass the previous bytes */
    1100           0 :                 int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
    1101             : 
    1102             :                 /* reset the source */
    1103           0 :                 source=pArgs->source;
    1104             : 
    1105           0 :                 if(count==(state&3)) {
    1106             :                     /* simple: all in the same buffer, just reset source */
    1107             :                 } else {
    1108           0 :                     UBool oldFlush=pArgs->flush;
    1109             : 
    1110             :                     /* some of the bytes are from a previous buffer, replay those first */
    1111           0 :                     pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
    1112           0 :                     pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */
    1113           0 :                     pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
    1114             : 
    1115             :                     /* no offsets: bytes from previous buffer, and not enough for output */
    1116           0 :                     T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
    1117             : 
    1118             :                     /* restore real pointers; pArgs->source will be set in case 8/9 */
    1119           0 :                     pArgs->sourceLimit=sourceLimit;
    1120           0 :                     pArgs->flush=oldFlush;
    1121             :                 }
    1122           0 :                 state=8;
    1123           0 :                 continue;
    1124             :             }
    1125           0 :             break;
    1126             :         case 8:
    1127             :             /* call UTF-32BE */
    1128           0 :             pArgs->source=source;
    1129           0 :             if(offsets==NULL) {
    1130           0 :                 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
    1131             :             } else {
    1132           0 :                 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode);
    1133             :             }
    1134           0 :             source=pArgs->source;
    1135           0 :             break;
    1136             :         case 9:
    1137             :             /* call UTF-32LE */
    1138           0 :             pArgs->source=source;
    1139           0 :             if(offsets==NULL) {
    1140           0 :                 T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
    1141             :             } else {
    1142           0 :                 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode);
    1143             :             }
    1144           0 :             source=pArgs->source;
    1145           0 :             break;
    1146             :         default:
    1147           0 :             break; /* does not occur */
    1148             :         }
    1149             :     }
    1150             : 
    1151             :     /* add BOM size to offsets - see comment at offsetDelta declaration */
    1152           0 :     if(offsets!=NULL && offsetDelta!=0) {
    1153           0 :         int32_t *offsetsLimit=pArgs->offsets;
    1154           0 :         while(offsets<offsetsLimit) {
    1155           0 :             *offsets++ += offsetDelta;
    1156             :         }
    1157             :     }
    1158             : 
    1159           0 :     pArgs->source=source;
    1160             : 
    1161           0 :     if(source==sourceLimit && pArgs->flush) {
    1162             :         /* handle truncated input */
    1163           0 :         switch(state) {
    1164             :         case 0:
    1165           0 :             break; /* no input at all, nothing to do */
    1166             :         case 8:
    1167           0 :             T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
    1168           0 :             break;
    1169             :         case 9:
    1170           0 :             T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode);
    1171           0 :             break;
    1172             :         default:
    1173             :             /* handle 0<state<8: call UTF-32BE with too-short input */
    1174           0 :             pArgs->source=utf32BOM+(state&4); /* select the correct BOM */
    1175           0 :             pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
    1176             : 
    1177             :             /* no offsets: not enough for output */
    1178           0 :             T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode);
    1179           0 :             pArgs->source=source;
    1180           0 :             pArgs->sourceLimit=sourceLimit;
    1181           0 :             state=8;
    1182           0 :             break;
    1183             :         }
    1184             :     }
    1185             : 
    1186           0 :     cnv->mode=state;
    1187           0 : }
    1188             : 
    1189             : static UChar32 U_CALLCONV
    1190           0 : _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs,
    1191             :                    UErrorCode *pErrorCode) {
    1192           0 :     switch(pArgs->converter->mode) {
    1193             :     case 8:
    1194           0 :         return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode);
    1195             :     case 9:
    1196           0 :         return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode);
    1197             :     default:
    1198           0 :         return UCNV_GET_NEXT_UCHAR_USE_TO_U;
    1199             :     }
    1200             : }
    1201             : U_CDECL_END
    1202             : static const UConverterImpl _UTF32Impl = {
    1203             :     UCNV_UTF32,
    1204             : 
    1205             :     NULL,
    1206             :     NULL,
    1207             : 
    1208             :     _UTF32Open,
    1209             :     NULL,
    1210             :     _UTF32Reset,
    1211             : 
    1212             :     _UTF32ToUnicodeWithOffsets,
    1213             :     _UTF32ToUnicodeWithOffsets,
    1214             : #if U_IS_BIG_ENDIAN
    1215             :     T_UConverter_fromUnicode_UTF32_BE,
    1216             :     T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC,
    1217             : #else
    1218             :     T_UConverter_fromUnicode_UTF32_LE,
    1219             :     T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC,
    1220             : #endif
    1221             :     _UTF32GetNextUChar,
    1222             : 
    1223             :     NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
    1224             :     NULL,
    1225             :     NULL,
    1226             :     NULL,
    1227             :     ucnv_getNonSurrogateUnicodeSet,
    1228             : 
    1229             :     NULL,
    1230             :     NULL
    1231             : };
    1232             : 
    1233             : /* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
    1234             : static const UConverterStaticData _UTF32StaticData = {
    1235             :     sizeof(UConverterStaticData),
    1236             :     "UTF-32",
    1237             :     1236,
    1238             :     UCNV_IBM, UCNV_UTF32, 4, 4,
    1239             : #if U_IS_BIG_ENDIAN
    1240             :     { 0, 0, 0xff, 0xfd }, 4,
    1241             : #else
    1242             :     { 0xfd, 0xff, 0, 0 }, 4,
    1243             : #endif
    1244             :     FALSE, FALSE,
    1245             :     0,
    1246             :     0,
    1247             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1248             : };
    1249             : 
    1250             : const UConverterSharedData _UTF32Data = 
    1251             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
    1252             : 
    1253             : #endif

Generated by: LCOV version 1.13