LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnv_u8.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 451 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 7 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*  
       4             : **********************************************************************
       5             : *   Copyright (C) 2002-2016, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : *   file name:  ucnv_u8.c
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2002jul01
      14             : *   created by: Markus W. Scherer
      15             : *
      16             : *   UTF-8 converter implementation. Used to be in ucnv_utf.c.
      17             : *
      18             : *   Also, CESU-8 implementation, see UTR 26.
      19             : *   The CESU-8 converter uses all the same functions as the
      20             : *   UTF-8 converter, with a branch for converting supplementary code points.
      21             : */
      22             : 
      23             : #include "unicode/utypes.h"
      24             : 
      25             : #if !UCONFIG_NO_CONVERSION
      26             : 
      27             : #include "unicode/ucnv.h"
      28             : #include "unicode/utf.h"
      29             : #include "unicode/utf8.h"
      30             : #include "unicode/utf16.h"
      31             : #include "ucnv_bld.h"
      32             : #include "ucnv_cnv.h"
      33             : #include "cmemory.h"
      34             : 
      35             : /* Prototypes --------------------------------------------------------------- */
      36             : 
      37             : /* Keep these here to make finicky compilers happy */
      38             : 
      39             : U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args,
      40             :                                            UErrorCode *err);
      41             : U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args,
      42             :                                                         UErrorCode *err);
      43             : 
      44             : 
      45             : /* UTF-8 -------------------------------------------------------------------- */
      46             : 
      47             : /* UTF-8 Conversion DATA
      48             :  *   for more information see Unicode Standard 2.0, Transformation Formats Appendix A-9
      49             :  */
      50             : /*static const uint32_t REPLACEMENT_CHARACTER = 0x0000FFFD;*/
      51             : #define MAXIMUM_UCS2            0x0000FFFF
      52             : #define MAXIMUM_UTF             0x0010FFFF
      53             : #define MAXIMUM_UCS4            0x7FFFFFFF
      54             : #define HALF_SHIFT              10
      55             : #define HALF_BASE               0x0010000
      56             : #define HALF_MASK               0x3FF
      57             : #define SURROGATE_HIGH_START    0xD800
      58             : #define SURROGATE_HIGH_END      0xDBFF
      59             : #define SURROGATE_LOW_START     0xDC00
      60             : #define SURROGATE_LOW_END       0xDFFF
      61             : 
      62             : /* -SURROGATE_LOW_START + HALF_BASE */
      63             : #define SURROGATE_LOW_BASE      9216
      64             : 
      65             : static const uint32_t offsetsFromUTF8[7] = {0,
      66             :   (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080,
      67             :   (uint32_t) 0x03C82080, (uint32_t) 0xFA082080, (uint32_t) 0x82082080
      68             : };
      69             : 
      70             : /* END OF UTF-8 Conversion DATA */
      71             : 
      72             : static const int8_t bytesFromUTF8[256] = {
      73             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      74             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      75             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      76             :   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      77             :   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      78             :   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      79             :   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      80             :   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
      81             : };
      82             : 
      83             : /*
      84             :  * Starting with Unicode 3.0.1:
      85             :  * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_minChar32[N];
      86             :  * byte sequences with more than 4 bytes are illegal in UTF-8,
      87             :  * which is tested with impossible values for them
      88             :  */
      89             : static const uint32_t
      90             : utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
      91             : 
      92           0 : static UBool hasCESU8Data(const UConverter *cnv)
      93             : {
      94             : #if UCONFIG_ONLY_HTML_CONVERSION
      95             :     return FALSE;
      96             : #else
      97           0 :     return (UBool)(cnv->sharedData == &_CESU8Data);
      98             : #endif
      99             : }
     100             : U_CDECL_BEGIN
     101           0 : static void  U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
     102             :                                   UErrorCode * err)
     103             : {
     104           0 :     UConverter *cnv = args->converter;
     105           0 :     const unsigned char *mySource = (unsigned char *) args->source;
     106           0 :     UChar *myTarget = args->target;
     107           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     108           0 :     const UChar *targetLimit = args->targetLimit;
     109           0 :     unsigned char *toUBytes = cnv->toUBytes;
     110           0 :     UBool isCESU8 = hasCESU8Data(cnv);
     111           0 :     uint32_t ch, ch2 = 0;
     112             :     int32_t i, inBytes;
     113             : 
     114             :     /* Restore size of current sequence */
     115           0 :     if (cnv->toUnicodeStatus && myTarget < targetLimit)
     116             :     {
     117           0 :         inBytes = cnv->mode;            /* restore # of bytes to consume */
     118           0 :         i = cnv->toULength;             /* restore # of bytes consumed */
     119           0 :         cnv->toULength = 0;
     120             : 
     121           0 :         ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
     122           0 :         cnv->toUnicodeStatus = 0;
     123           0 :         goto morebytes;
     124             :     }
     125             : 
     126             : 
     127           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     128             :     {
     129           0 :         ch = *(mySource++);
     130           0 :         if (ch < 0x80)        /* Simple case */
     131             :         {
     132           0 :             *(myTarget++) = (UChar) ch;
     133             :         }
     134             :         else
     135             :         {
     136             :             /* store the first char */
     137           0 :             toUBytes[0] = (char)ch;
     138           0 :             inBytes = bytesFromUTF8[ch]; /* lookup current sequence length */
     139           0 :             i = 1;
     140             : 
     141             : morebytes:
     142           0 :             while (i < inBytes)
     143             :             {
     144           0 :                 if (mySource < sourceLimit)
     145             :                 {
     146           0 :                     toUBytes[i] = (char) (ch2 = *mySource);
     147           0 :                     if (!U8_IS_TRAIL(ch2))
     148             :                     {
     149           0 :                         break; /* i < inBytes */
     150             :                     }
     151           0 :                     ch = (ch << 6) + ch2;
     152           0 :                     ++mySource;
     153           0 :                     i++;
     154             :                 }
     155             :                 else
     156             :                 {
     157             :                     /* stores a partially calculated target*/
     158           0 :                     cnv->toUnicodeStatus = ch;
     159           0 :                     cnv->mode = inBytes;
     160           0 :                     cnv->toULength = (int8_t) i;
     161           0 :                     goto donefornow;
     162             :                 }
     163             :             }
     164             : 
     165             :             /* Remove the accumulated high bits */
     166           0 :             ch -= offsetsFromUTF8[inBytes];
     167             : 
     168             :             /*
     169             :              * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
     170             :              * - use only trail bytes after a lead byte (checked above)
     171             :              * - use the right number of trail bytes for a given lead byte
     172             :              * - encode a code point <= U+10ffff
     173             :              * - use the fewest possible number of bytes for their code points
     174             :              * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
     175             :              *
     176             :              * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
     177             :              * There are no irregular sequences any more.
     178             :              * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
     179             :              */
     180           0 :             if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
     181           0 :                 (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
     182             :             {
     183             :                 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
     184           0 :                 if (ch <= MAXIMUM_UCS2) 
     185             :                 {
     186             :                     /* fits in 16 bits */
     187           0 :                     *(myTarget++) = (UChar) ch;
     188             :                 }
     189             :                 else
     190             :                 {
     191             :                     /* write out the surrogates */
     192           0 :                     ch -= HALF_BASE;
     193           0 :                     *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
     194           0 :                     ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
     195           0 :                     if (myTarget < targetLimit)
     196             :                     {
     197           0 :                         *(myTarget++) = (UChar)ch;
     198             :                     }
     199             :                     else
     200             :                     {
     201             :                         /* Put in overflow buffer (not handled here) */
     202           0 :                         cnv->UCharErrorBuffer[0] = (UChar) ch;
     203           0 :                         cnv->UCharErrorBufferLength = 1;
     204           0 :                         *err = U_BUFFER_OVERFLOW_ERROR;
     205           0 :                         break;
     206             :                     }
     207             :                 }
     208             :             }
     209             :             else
     210             :             {
     211           0 :                 cnv->toULength = (int8_t)i;
     212           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     213           0 :                 break;
     214             :             }
     215             :         }
     216             :     }
     217             : 
     218             : donefornow:
     219           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     220             :     {
     221             :         /* End of target buffer */
     222           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     223             :     }
     224             : 
     225           0 :     args->target = myTarget;
     226           0 :     args->source = (const char *) mySource;
     227           0 : }
     228             : 
     229           0 : static void  U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
     230             :                                                 UErrorCode * err)
     231             : {
     232           0 :     UConverter *cnv = args->converter;
     233           0 :     const unsigned char *mySource = (unsigned char *) args->source;
     234           0 :     UChar *myTarget = args->target;
     235           0 :     int32_t *myOffsets = args->offsets;
     236           0 :     int32_t offsetNum = 0;
     237           0 :     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
     238           0 :     const UChar *targetLimit = args->targetLimit;
     239           0 :     unsigned char *toUBytes = cnv->toUBytes;
     240           0 :     UBool isCESU8 = hasCESU8Data(cnv);
     241           0 :     uint32_t ch, ch2 = 0;
     242             :     int32_t i, inBytes;
     243             : 
     244             :     /* Restore size of current sequence */
     245           0 :     if (cnv->toUnicodeStatus && myTarget < targetLimit)
     246             :     {
     247           0 :         inBytes = cnv->mode;            /* restore # of bytes to consume */
     248           0 :         i = cnv->toULength;             /* restore # of bytes consumed */
     249           0 :         cnv->toULength = 0;
     250             : 
     251           0 :         ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/
     252           0 :         cnv->toUnicodeStatus = 0;
     253           0 :         goto morebytes;
     254             :     }
     255             : 
     256           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     257             :     {
     258           0 :         ch = *(mySource++);
     259           0 :         if (ch < 0x80)        /* Simple case */
     260             :         {
     261           0 :             *(myTarget++) = (UChar) ch;
     262           0 :             *(myOffsets++) = offsetNum++;
     263             :         }
     264             :         else
     265             :         {
     266           0 :             toUBytes[0] = (char)ch;
     267           0 :             inBytes = bytesFromUTF8[ch];
     268           0 :             i = 1;
     269             : 
     270             : morebytes:
     271           0 :             while (i < inBytes)
     272             :             {
     273           0 :                 if (mySource < sourceLimit)
     274             :                 {
     275           0 :                     toUBytes[i] = (char) (ch2 = *mySource);
     276           0 :                     if (!U8_IS_TRAIL(ch2))
     277             :                     {
     278           0 :                         break; /* i < inBytes */
     279             :                     }
     280           0 :                     ch = (ch << 6) + ch2;
     281           0 :                     ++mySource;
     282           0 :                     i++;
     283             :                 }
     284             :                 else
     285             :                 {
     286           0 :                     cnv->toUnicodeStatus = ch;
     287           0 :                     cnv->mode = inBytes;
     288           0 :                     cnv->toULength = (int8_t)i;
     289           0 :                     goto donefornow;
     290             :                 }
     291             :             }
     292             : 
     293             :             /* Remove the accumulated high bits */
     294           0 :             ch -= offsetsFromUTF8[inBytes];
     295             : 
     296             :             /*
     297             :              * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
     298             :              * - use only trail bytes after a lead byte (checked above)
     299             :              * - use the right number of trail bytes for a given lead byte
     300             :              * - encode a code point <= U+10ffff
     301             :              * - use the fewest possible number of bytes for their code points
     302             :              * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
     303             :              *
     304             :              * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
     305             :              * There are no irregular sequences any more.
     306             :              * In CESU-8, only surrogates, not supplementary code points, are encoded directly.
     307             :              */
     308           0 :             if (i == inBytes && ch <= MAXIMUM_UTF && ch >= utf8_minChar32[i] &&
     309           0 :                 (isCESU8 ? i <= 3 : !U_IS_SURROGATE(ch)))
     310             :             {
     311             :                 /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
     312           0 :                 if (ch <= MAXIMUM_UCS2) 
     313             :                 {
     314             :                     /* fits in 16 bits */
     315           0 :                     *(myTarget++) = (UChar) ch;
     316           0 :                     *(myOffsets++) = offsetNum;
     317             :                 }
     318             :                 else
     319             :                 {
     320             :                     /* write out the surrogates */
     321           0 :                     ch -= HALF_BASE;
     322           0 :                     *(myTarget++) = (UChar) ((ch >> HALF_SHIFT) + SURROGATE_HIGH_START);
     323           0 :                     *(myOffsets++) = offsetNum;
     324           0 :                     ch = (ch & HALF_MASK) + SURROGATE_LOW_START;
     325           0 :                     if (myTarget < targetLimit)
     326             :                     {
     327           0 :                         *(myTarget++) = (UChar)ch;
     328           0 :                         *(myOffsets++) = offsetNum;
     329             :                     }
     330             :                     else
     331             :                     {
     332           0 :                         cnv->UCharErrorBuffer[0] = (UChar) ch;
     333           0 :                         cnv->UCharErrorBufferLength = 1;
     334           0 :                         *err = U_BUFFER_OVERFLOW_ERROR;
     335             :                     }
     336             :                 }
     337           0 :                 offsetNum += i;
     338             :             }
     339             :             else
     340             :             {
     341           0 :                 cnv->toULength = (int8_t)i;
     342           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     343           0 :                 break;
     344             :             }
     345             :         }
     346             :     }
     347             : 
     348             : donefornow:
     349           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     350             :     {   /* End of target buffer */
     351           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     352             :     }
     353             : 
     354           0 :     args->target = myTarget;
     355           0 :     args->source = (const char *) mySource;
     356           0 :     args->offsets = myOffsets;
     357           0 : }
     358             : U_CDECL_END
     359             : 
     360           0 : U_CFUNC void  U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
     361             :                                     UErrorCode * err)
     362             : {
     363           0 :     UConverter *cnv = args->converter;
     364           0 :     const UChar *mySource = args->source;
     365           0 :     const UChar *sourceLimit = args->sourceLimit;
     366           0 :     uint8_t *myTarget = (uint8_t *) args->target;
     367           0 :     const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
     368             :     uint8_t *tempPtr;
     369             :     UChar32 ch;
     370             :     uint8_t tempBuf[4];
     371             :     int32_t indexToWrite;
     372           0 :     UBool isNotCESU8 = !hasCESU8Data(cnv);
     373             : 
     374           0 :     if (cnv->fromUChar32 && myTarget < targetLimit)
     375             :     {
     376           0 :         ch = cnv->fromUChar32;
     377           0 :         cnv->fromUChar32 = 0;
     378           0 :         goto lowsurrogate;
     379             :     }
     380             : 
     381           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     382             :     {
     383           0 :         ch = *(mySource++);
     384             : 
     385           0 :         if (ch < 0x80)        /* Single byte */
     386             :         {
     387           0 :             *(myTarget++) = (uint8_t) ch;
     388             :         }
     389           0 :         else if (ch < 0x800)  /* Double byte */
     390             :         {
     391           0 :             *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
     392           0 :             if (myTarget < targetLimit)
     393             :             {
     394           0 :                 *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
     395             :             }
     396             :             else
     397             :             {
     398           0 :                 cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
     399           0 :                 cnv->charErrorBufferLength = 1;
     400           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     401             :             }
     402             :         }
     403             :         else {
     404             :             /* Check for surrogates */
     405           0 :             if(U16_IS_SURROGATE(ch) && isNotCESU8) {
     406             : lowsurrogate:
     407           0 :                 if (mySource < sourceLimit) {
     408             :                     /* test both code units */
     409           0 :                     if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
     410             :                         /* convert and consume this supplementary code point */
     411           0 :                         ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
     412           0 :                         ++mySource;
     413             :                         /* exit this condition tree */
     414             :                     }
     415             :                     else {
     416             :                         /* this is an unpaired trail or lead code unit */
     417             :                         /* callback(illegal) */
     418           0 :                         cnv->fromUChar32 = ch;
     419           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     420           0 :                         break;
     421             :                     }
     422             :                 }
     423             :                 else {
     424             :                     /* no more input */
     425           0 :                     cnv->fromUChar32 = ch;
     426           0 :                     break;
     427             :                 }
     428             :             }
     429             : 
     430             :             /* Do we write the buffer directly for speed,
     431             :             or do we have to be careful about target buffer space? */
     432           0 :             tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
     433             : 
     434           0 :             if (ch <= MAXIMUM_UCS2) {
     435           0 :                 indexToWrite = 2;
     436           0 :                 tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
     437             :             }
     438             :             else {
     439           0 :                 indexToWrite = 3;
     440           0 :                 tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
     441           0 :                 tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
     442             :             }
     443           0 :             tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
     444           0 :             tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
     445             : 
     446           0 :             if (tempPtr == myTarget) {
     447             :                 /* There was enough space to write the codepoint directly. */
     448           0 :                 myTarget += (indexToWrite + 1);
     449             :             }
     450             :             else {
     451             :                 /* We might run out of room soon. Write it slowly. */
     452           0 :                 for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
     453           0 :                     if (myTarget < targetLimit) {
     454           0 :                         *(myTarget++) = *tempPtr;
     455             :                     }
     456             :                     else {
     457           0 :                         cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
     458           0 :                         *err = U_BUFFER_OVERFLOW_ERROR;
     459             :                     }
     460             :                 }
     461             :             }
     462             :         }
     463             :     }
     464             : 
     465           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     466             :     {
     467           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     468             :     }
     469             : 
     470           0 :     args->target = (char *) myTarget;
     471           0 :     args->source = mySource;
     472           0 : }
     473             : 
     474           0 : U_CFUNC void  U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
     475             :                                                   UErrorCode * err)
     476             : {
     477           0 :     UConverter *cnv = args->converter;
     478           0 :     const UChar *mySource = args->source;
     479           0 :     int32_t *myOffsets = args->offsets;
     480           0 :     const UChar *sourceLimit = args->sourceLimit;
     481           0 :     uint8_t *myTarget = (uint8_t *) args->target;
     482           0 :     const uint8_t *targetLimit = (uint8_t *) args->targetLimit;
     483             :     uint8_t *tempPtr;
     484             :     UChar32 ch;
     485             :     int32_t offsetNum, nextSourceIndex;
     486             :     int32_t indexToWrite;
     487             :     uint8_t tempBuf[4];
     488           0 :     UBool isNotCESU8 = !hasCESU8Data(cnv);
     489             : 
     490           0 :     if (cnv->fromUChar32 && myTarget < targetLimit)
     491             :     {
     492           0 :         ch = cnv->fromUChar32;
     493           0 :         cnv->fromUChar32 = 0;
     494           0 :         offsetNum = -1;
     495           0 :         nextSourceIndex = 0;
     496           0 :         goto lowsurrogate;
     497             :     } else {
     498           0 :         offsetNum = 0;
     499             :     }
     500             : 
     501           0 :     while (mySource < sourceLimit && myTarget < targetLimit)
     502             :     {
     503           0 :         ch = *(mySource++);
     504             : 
     505           0 :         if (ch < 0x80)        /* Single byte */
     506             :         {
     507           0 :             *(myOffsets++) = offsetNum++;
     508           0 :             *(myTarget++) = (char) ch;
     509             :         }
     510           0 :         else if (ch < 0x800)  /* Double byte */
     511             :         {
     512           0 :             *(myOffsets++) = offsetNum;
     513           0 :             *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0);
     514           0 :             if (myTarget < targetLimit)
     515             :             {
     516           0 :                 *(myOffsets++) = offsetNum++;
     517           0 :                 *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80);
     518             :             }
     519             :             else
     520             :             {
     521           0 :                 cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80);
     522           0 :                 cnv->charErrorBufferLength = 1;
     523           0 :                 *err = U_BUFFER_OVERFLOW_ERROR;
     524             :             }
     525             :         }
     526             :         else
     527             :         /* Check for surrogates */
     528             :         {
     529           0 :             nextSourceIndex = offsetNum + 1;
     530             : 
     531           0 :             if(U16_IS_SURROGATE(ch) && isNotCESU8) {
     532             : lowsurrogate:
     533           0 :                 if (mySource < sourceLimit) {
     534             :                     /* test both code units */
     535           0 :                     if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) {
     536             :                         /* convert and consume this supplementary code point */
     537           0 :                         ch=U16_GET_SUPPLEMENTARY(ch, *mySource);
     538           0 :                         ++mySource;
     539           0 :                         ++nextSourceIndex;
     540             :                         /* exit this condition tree */
     541             :                     }
     542             :                     else {
     543             :                         /* this is an unpaired trail or lead code unit */
     544             :                         /* callback(illegal) */
     545           0 :                         cnv->fromUChar32 = ch;
     546           0 :                         *err = U_ILLEGAL_CHAR_FOUND;
     547           0 :                         break;
     548             :                     }
     549             :                 }
     550             :                 else {
     551             :                     /* no more input */
     552           0 :                     cnv->fromUChar32 = ch;
     553           0 :                     break;
     554             :                 }
     555             :             }
     556             : 
     557             :             /* Do we write the buffer directly for speed,
     558             :             or do we have to be careful about target buffer space? */
     559           0 :             tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf);
     560             : 
     561           0 :             if (ch <= MAXIMUM_UCS2) {
     562           0 :                 indexToWrite = 2;
     563           0 :                 tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0);
     564             :             }
     565             :             else {
     566           0 :                 indexToWrite = 3;
     567           0 :                 tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0);
     568           0 :                 tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80);
     569             :             }
     570           0 :             tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80);
     571           0 :             tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80);
     572             : 
     573           0 :             if (tempPtr == myTarget) {
     574             :                 /* There was enough space to write the codepoint directly. */
     575           0 :                 myTarget += (indexToWrite + 1);
     576           0 :                 myOffsets[0] = offsetNum;
     577           0 :                 myOffsets[1] = offsetNum;
     578           0 :                 myOffsets[2] = offsetNum;
     579           0 :                 if (indexToWrite >= 3) {
     580           0 :                     myOffsets[3] = offsetNum;
     581             :                 }
     582           0 :                 myOffsets += (indexToWrite + 1);
     583             :             }
     584             :             else {
     585             :                 /* We might run out of room soon. Write it slowly. */
     586           0 :                 for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) {
     587           0 :                     if (myTarget < targetLimit)
     588             :                     {
     589           0 :                         *(myOffsets++) = offsetNum;
     590           0 :                         *(myTarget++) = *tempPtr;
     591             :                     }
     592             :                     else
     593             :                     {
     594           0 :                         cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr;
     595           0 :                         *err = U_BUFFER_OVERFLOW_ERROR;
     596             :                     }
     597             :                 }
     598             :             }
     599           0 :             offsetNum = nextSourceIndex;
     600             :         }
     601             :     }
     602             : 
     603           0 :     if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
     604             :     {
     605           0 :         *err = U_BUFFER_OVERFLOW_ERROR;
     606             :     }
     607             : 
     608           0 :     args->target = (char *) myTarget;
     609           0 :     args->source = mySource;
     610           0 :     args->offsets = myOffsets;
     611           0 : }
     612             : 
     613             : U_CDECL_BEGIN
     614           0 : static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args,
     615             :                                                UErrorCode *err) {
     616             :     UConverter *cnv;
     617             :     const uint8_t *sourceInitial;
     618             :     const uint8_t *source;
     619             :     uint16_t extraBytesToWrite;
     620             :     uint8_t myByte;
     621             :     UChar32 ch;
     622             :     int8_t i, isLegalSequence;
     623             : 
     624             :     /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */
     625             : 
     626           0 :     cnv = args->converter;
     627           0 :     sourceInitial = source = (const uint8_t *)args->source;
     628           0 :     if (source >= (const uint8_t *)args->sourceLimit)
     629             :     {
     630             :         /* no input */
     631           0 :         *err = U_INDEX_OUTOFBOUNDS_ERROR;
     632           0 :         return 0xffff;
     633             :     }
     634             : 
     635           0 :     myByte = (uint8_t)*(source++);
     636           0 :     if (myByte < 0x80)
     637             :     {
     638           0 :         args->source = (const char *)source;
     639           0 :         return (UChar32)myByte;
     640             :     }
     641             : 
     642           0 :     extraBytesToWrite = (uint16_t)bytesFromUTF8[myByte];
     643           0 :     if (extraBytesToWrite == 0) {
     644           0 :         cnv->toUBytes[0] = myByte;
     645           0 :         cnv->toULength = 1;
     646           0 :         *err = U_ILLEGAL_CHAR_FOUND;
     647           0 :         args->source = (const char *)source;
     648           0 :         return 0xffff;
     649             :     }
     650             : 
     651             :     /*The byte sequence is longer than the buffer area passed*/
     652           0 :     if (((const char *)source + extraBytesToWrite - 1) > args->sourceLimit)
     653             :     {
     654             :         /* check if all of the remaining bytes are trail bytes */
     655           0 :         cnv->toUBytes[0] = myByte;
     656           0 :         i = 1;
     657           0 :         *err = U_TRUNCATED_CHAR_FOUND;
     658           0 :         while(source < (const uint8_t *)args->sourceLimit) {
     659           0 :             if(U8_IS_TRAIL(myByte = *source)) {
     660           0 :                 cnv->toUBytes[i++] = myByte;
     661           0 :                 ++source;
     662             :             } else {
     663             :                 /* error even before we run out of input */
     664           0 :                 *err = U_ILLEGAL_CHAR_FOUND;
     665           0 :                 break;
     666             :             }
     667             :         }
     668           0 :         cnv->toULength = i;
     669           0 :         args->source = (const char *)source;
     670           0 :         return 0xffff;
     671             :     }
     672             : 
     673           0 :     isLegalSequence = 1;
     674           0 :     ch = myByte << 6;
     675           0 :     switch(extraBytesToWrite)
     676             :     {     
     677             :       /* note: code falls through cases! (sic)*/ 
     678             :     case 6:
     679           0 :         ch += (myByte = *source);
     680           0 :         ch <<= 6;
     681           0 :         if (!U8_IS_TRAIL(myByte))
     682             :         {
     683           0 :             isLegalSequence = 0;
     684           0 :             break;
     685             :         }
     686           0 :         ++source;
     687             :         U_FALLTHROUGH;
     688             :     case 5:
     689           0 :         ch += (myByte = *source);
     690           0 :         ch <<= 6;
     691           0 :         if (!U8_IS_TRAIL(myByte))
     692             :         {
     693           0 :             isLegalSequence = 0;
     694           0 :             break;
     695             :         }
     696           0 :         ++source;
     697             :         U_FALLTHROUGH;
     698             :     case 4:
     699           0 :         ch += (myByte = *source);
     700           0 :         ch <<= 6;
     701           0 :         if (!U8_IS_TRAIL(myByte))
     702             :         {
     703           0 :             isLegalSequence = 0;
     704           0 :             break;
     705             :         }
     706           0 :         ++source;
     707             :         U_FALLTHROUGH;
     708             :     case 3:
     709           0 :         ch += (myByte = *source);
     710           0 :         ch <<= 6;
     711           0 :         if (!U8_IS_TRAIL(myByte))
     712             :         {
     713           0 :             isLegalSequence = 0;
     714           0 :             break;
     715             :         }
     716           0 :         ++source;
     717             :         U_FALLTHROUGH;
     718             :     case 2:
     719           0 :         ch += (myByte = *source);
     720           0 :         if (!U8_IS_TRAIL(myByte))
     721             :         {
     722           0 :             isLegalSequence = 0;
     723           0 :             break;
     724             :         }
     725           0 :         ++source;
     726             :     };
     727           0 :     ch -= offsetsFromUTF8[extraBytesToWrite];
     728           0 :     args->source = (const char *)source;
     729             : 
     730             :     /*
     731             :      * Legal UTF-8 byte sequences in Unicode 3.0.1 and up:
     732             :      * - use only trail bytes after a lead byte (checked above)
     733             :      * - use the right number of trail bytes for a given lead byte
     734             :      * - encode a code point <= U+10ffff
     735             :      * - use the fewest possible number of bytes for their code points
     736             :      * - use at most 4 bytes (for i>=5 it is 0x10ffff<utf8_minChar32[])
     737             :      *
     738             :      * Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8.
     739             :      * There are no irregular sequences any more.
     740             :      */
     741           0 :     if (isLegalSequence &&
     742           0 :         (uint32_t)ch <= MAXIMUM_UTF &&
     743           0 :         (uint32_t)ch >= utf8_minChar32[extraBytesToWrite] &&
     744           0 :         !U_IS_SURROGATE(ch)
     745             :     ) {
     746           0 :         return ch; /* return the code point */
     747             :     }
     748             : 
     749           0 :     for(i = 0; sourceInitial < source; ++i) {
     750           0 :         cnv->toUBytes[i] = *sourceInitial++;
     751             :     }
     752           0 :     cnv->toULength = i;
     753           0 :     *err = U_ILLEGAL_CHAR_FOUND;
     754           0 :     return 0xffff;
     755             : } 
     756             : U_CDECL_END
     757             : 
     758             : /* UTF-8-from-UTF-8 conversion functions ------------------------------------ */
     759             : 
     760             : /* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
     761             : static const UChar32
     762             : utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
     763             : 
     764             : /* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
     765             : static const UChar32
     766             : utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
     767             : 
     768             : U_CDECL_BEGIN
     769             : /* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */
     770             : static void U_CALLCONV
     771           0 : ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     772             :                   UConverterToUnicodeArgs *pToUArgs,
     773             :                   UErrorCode *pErrorCode) {
     774             :     UConverter *utf8;
     775             :     const uint8_t *source, *sourceLimit;
     776             :     uint8_t *target;
     777             :     int32_t targetCapacity;
     778             :     int32_t count;
     779             : 
     780             :     int8_t oldToULength, toULength, toULimit;
     781             : 
     782             :     UChar32 c;
     783             :     uint8_t b, t1, t2;
     784             : 
     785             :     /* set up the local pointers */
     786           0 :     utf8=pToUArgs->converter;
     787           0 :     source=(uint8_t *)pToUArgs->source;
     788           0 :     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
     789           0 :     target=(uint8_t *)pFromUArgs->target;
     790           0 :     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
     791             : 
     792             :     /* get the converter state from the UTF-8 UConverter */
     793           0 :     c=(UChar32)utf8->toUnicodeStatus;
     794           0 :     if(c!=0) {
     795           0 :         toULength=oldToULength=utf8->toULength;
     796           0 :         toULimit=(int8_t)utf8->mode;
     797             :     } else {
     798           0 :         toULength=oldToULength=toULimit=0;
     799             :     }
     800             : 
     801           0 :     count=(int32_t)(sourceLimit-source)+oldToULength;
     802           0 :     if(count<toULimit) {
     803             :         /*
     804             :          * Not enough input to complete the partial character.
     805             :          * Jump to moreBytes below - it will not output to target.
     806             :          */
     807           0 :     } else if(targetCapacity<toULimit) {
     808             :         /*
     809             :          * Not enough target capacity to output the partial character.
     810             :          * Let the standard converter handle this.
     811             :          */
     812           0 :         *pErrorCode=U_USING_DEFAULT_WARNING;
     813           0 :         return;
     814             :     } else {
     815             :         /*
     816             :          * Use a single counter for source and target, counting the minimum of
     817             :          * the source length and the target capacity.
     818             :          * As a result, the source length is checked only once per multi-byte
     819             :          * character instead of twice.
     820             :          *
     821             :          * Make sure that the last byte sequence is complete, or else
     822             :          * stop just before it.
     823             :          * (The longest legal byte sequence has 3 trail bytes.)
     824             :          * Count oldToULength (number of source bytes from a previous buffer)
     825             :          * into the source length but reduce the source index by toULimit
     826             :          * while going back over trail bytes in order to not go back into
     827             :          * the bytes that will be read for finishing a partial
     828             :          * sequence from the previous buffer.
     829             :          * Let the standard converter handle edge cases.
     830             :          */
     831             :         int32_t i;
     832             : 
     833           0 :         if(count>targetCapacity) {
     834           0 :             count=targetCapacity;
     835             :         }
     836             : 
     837           0 :         i=0;
     838           0 :         while(i<3 && i<(count-toULimit)) {
     839           0 :             b=source[count-oldToULength-i-1];
     840           0 :             if(U8_IS_TRAIL(b)) {
     841           0 :                 ++i;
     842             :             } else {
     843           0 :                 if(i<U8_COUNT_TRAIL_BYTES(b)) {
     844             :                     /* stop converting before the lead byte if there are not enough trail bytes for it */
     845           0 :                     count-=i+1;
     846             :                 }
     847           0 :                 break;
     848             :             }
     849             :         }
     850             :     }
     851             : 
     852           0 :     if(c!=0) {
     853           0 :         utf8->toUnicodeStatus=0;
     854           0 :         utf8->toULength=0;
     855           0 :         goto moreBytes;
     856             :         /* See note in ucnv_SBCSFromUTF8() about this goto. */
     857             :     }
     858             : 
     859             :     /* conversion loop */
     860           0 :     while(count>0) {
     861           0 :         b=*source++;
     862           0 :         if((int8_t)b>=0) {
     863             :             /* convert ASCII */
     864           0 :             *target++=b;
     865           0 :             --count;
     866           0 :             continue;
     867             :         } else {
     868           0 :             if(b>0xe0) {
     869           0 :                 if( /* handle U+1000..U+D7FF inline */
     870           0 :                     (t1=source[0]) >= 0x80 && ((b<0xed && (t1 <= 0xbf)) ||
     871           0 :                                                (b==0xed && (t1 <= 0x9f))) &&
     872           0 :                     (t2=source[1]) >= 0x80 && t2 <= 0xbf
     873             :                 ) {
     874           0 :                     source+=2;
     875           0 :                     *target++=b;
     876           0 :                     *target++=t1;
     877           0 :                     *target++=t2;
     878           0 :                     count-=3;
     879           0 :                     continue;
     880             :                 }
     881           0 :             } else if(b<0xe0) {
     882           0 :                 if( /* handle U+0080..U+07FF inline */
     883           0 :                     b>=0xc2 &&
     884           0 :                     (t1=*source) >= 0x80 && t1 <= 0xbf
     885             :                 ) {
     886           0 :                     ++source;
     887           0 :                     *target++=b;
     888           0 :                     *target++=t1;
     889           0 :                     count-=2;
     890           0 :                     continue;
     891             :                 }
     892           0 :             } else if(b==0xe0) {
     893           0 :                 if( /* handle U+0800..U+0FFF inline */
     894           0 :                     (t1=source[0]) >= 0xa0 && t1 <= 0xbf &&
     895           0 :                     (t2=source[1]) >= 0x80 && t2 <= 0xbf
     896             :                 ) {
     897           0 :                     source+=2;
     898           0 :                     *target++=b;
     899           0 :                     *target++=t1;
     900           0 :                     *target++=t2;
     901           0 :                     count-=3;
     902           0 :                     continue;
     903             :                 }
     904             :             }
     905             : 
     906             :             /* handle "complicated" and error cases, and continuing partial characters */
     907           0 :             oldToULength=0;
     908           0 :             toULength=1;
     909           0 :             toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
     910           0 :             c=b;
     911             : moreBytes:
     912           0 :             while(toULength<toULimit) {
     913           0 :                 if(source<sourceLimit) {
     914           0 :                     b=*source;
     915           0 :                     if(U8_IS_TRAIL(b)) {
     916           0 :                         ++source;
     917           0 :                         ++toULength;
     918           0 :                         c=(c<<6)+b;
     919             :                     } else {
     920           0 :                         break; /* sequence too short, stop with toULength<toULimit */
     921             :                     }
     922             :                 } else {
     923             :                     /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
     924           0 :                     source-=(toULength-oldToULength);
     925           0 :                     while(oldToULength<toULength) {
     926           0 :                         utf8->toUBytes[oldToULength++]=*source++;
     927             :                     }
     928           0 :                     utf8->toUnicodeStatus=c;
     929           0 :                     utf8->toULength=toULength;
     930           0 :                     utf8->mode=toULimit;
     931           0 :                     pToUArgs->source=(char *)source;
     932           0 :                     pFromUArgs->target=(char *)target;
     933           0 :                     return;
     934             :                 }
     935             :             }
     936             : 
     937           0 :             if( toULength==toULimit &&      /* consumed all trail bytes */
     938           0 :                 (toULength==3 || toULength==2) &&             /* BMP */
     939           0 :                 (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
     940           0 :                 (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
     941             :             ) {
     942             :                 /* legal byte sequence for BMP code point */
     943           0 :             } else if(
     944           0 :                 toULength==toULimit && toULength==4 &&
     945           0 :                 (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
     946             :             ) {
     947             :                 /* legal byte sequence for supplementary code point */
     948             :             } else {
     949             :                 /* error handling: illegal UTF-8 byte sequence */
     950           0 :                 source-=(toULength-oldToULength);
     951           0 :                 while(oldToULength<toULength) {
     952           0 :                     utf8->toUBytes[oldToULength++]=*source++;
     953             :                 }
     954           0 :                 utf8->toULength=toULength;
     955           0 :                 pToUArgs->source=(char *)source;
     956           0 :                 pFromUArgs->target=(char *)target;
     957           0 :                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     958           0 :                 return;
     959             :             }
     960             : 
     961             :             /* copy the legal byte sequence to the target */
     962             :             {
     963             :                 int8_t i;
     964             : 
     965           0 :                 for(i=0; i<oldToULength; ++i) {
     966           0 :                     *target++=utf8->toUBytes[i];
     967             :                 }
     968           0 :                 source-=(toULength-oldToULength);
     969           0 :                 for(; i<toULength; ++i) {
     970           0 :                     *target++=*source++;
     971             :                 }
     972           0 :                 count-=toULength;
     973             :             }
     974             :         }
     975             :     }
     976             : 
     977           0 :     if(U_SUCCESS(*pErrorCode) && source<sourceLimit) {
     978           0 :         if(target==(const uint8_t *)pFromUArgs->targetLimit) {
     979           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     980             :         } else {
     981           0 :             b=*source;
     982           0 :             toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
     983           0 :             if(toULimit>(sourceLimit-source)) {
     984             :                 /* collect a truncated byte sequence */
     985           0 :                 toULength=0;
     986           0 :                 c=b;
     987             :                 for(;;) {
     988           0 :                     utf8->toUBytes[toULength++]=b;
     989           0 :                     if(++source==sourceLimit) {
     990             :                         /* partial byte sequence at end of source */
     991           0 :                         utf8->toUnicodeStatus=c;
     992           0 :                         utf8->toULength=toULength;
     993           0 :                         utf8->mode=toULimit;
     994           0 :                         break;
     995           0 :                     } else if(!U8_IS_TRAIL(b=*source)) {
     996             :                         /* lead byte in trail byte position */
     997           0 :                         utf8->toULength=toULength;
     998           0 :                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     999           0 :                         break;
    1000             :                     }
    1001           0 :                     c=(c<<6)+b;
    1002             :                 }
    1003             :             } else {
    1004             :                 /* partial-sequence target overflow: fall back to the pivoting implementation */
    1005           0 :                 *pErrorCode=U_USING_DEFAULT_WARNING;
    1006             :             }
    1007             :         }
    1008             :     }
    1009             : 
    1010             :     /* write back the updated pointers */
    1011           0 :     pToUArgs->source=(char *)source;
    1012           0 :     pFromUArgs->target=(char *)target;
    1013             : }
    1014             : 
    1015             : U_CDECL_END
    1016             : 
    1017             : /* UTF-8 converter data ----------------------------------------------------- */
    1018             : 
    1019             : static const UConverterImpl _UTF8Impl={
    1020             :     UCNV_UTF8,
    1021             : 
    1022             :     NULL,
    1023             :     NULL,
    1024             : 
    1025             :     NULL,
    1026             :     NULL,
    1027             :     NULL,
    1028             : 
    1029             :     ucnv_toUnicode_UTF8,
    1030             :     ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
    1031             :     ucnv_fromUnicode_UTF8,
    1032             :     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
    1033             :     ucnv_getNextUChar_UTF8,
    1034             : 
    1035             :     NULL,
    1036             :     NULL,
    1037             :     NULL,
    1038             :     NULL,
    1039             :     ucnv_getNonSurrogateUnicodeSet,
    1040             : 
    1041             :     ucnv_UTF8FromUTF8,
    1042             :     ucnv_UTF8FromUTF8
    1043             : };
    1044             : 
    1045             : /* The 1208 CCSID refers to any version of Unicode of UTF-8 */
    1046             : static const UConverterStaticData _UTF8StaticData={
    1047             :     sizeof(UConverterStaticData),
    1048             :     "UTF-8",
    1049             :     1208, UCNV_IBM, UCNV_UTF8,
    1050             :     1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */
    1051             :     { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
    1052             :     0,
    1053             :     0,
    1054             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1055             : };
    1056             : 
    1057             : 
    1058             : const UConverterSharedData _UTF8Data=
    1059             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl);
    1060             : 
    1061             : /* CESU-8 converter data ---------------------------------------------------- */
    1062             : 
    1063             : static const UConverterImpl _CESU8Impl={
    1064             :     UCNV_CESU8,
    1065             : 
    1066             :     NULL,
    1067             :     NULL,
    1068             : 
    1069             :     NULL,
    1070             :     NULL,
    1071             :     NULL,
    1072             : 
    1073             :     ucnv_toUnicode_UTF8,
    1074             :     ucnv_toUnicode_UTF8_OFFSETS_LOGIC,
    1075             :     ucnv_fromUnicode_UTF8,
    1076             :     ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
    1077             :     NULL,
    1078             : 
    1079             :     NULL,
    1080             :     NULL,
    1081             :     NULL,
    1082             :     NULL,
    1083             :     ucnv_getCompleteUnicodeSet,
    1084             : 
    1085             :     NULL,
    1086             :     NULL
    1087             : };
    1088             : 
    1089             : static const UConverterStaticData _CESU8StaticData={
    1090             :     sizeof(UConverterStaticData),
    1091             :     "CESU-8",
    1092             :     9400, /* CCSID for CESU-8 */
    1093             :     UCNV_UNKNOWN, UCNV_CESU8, 1, 3,
    1094             :     { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE,
    1095             :     0,
    1096             :     0,
    1097             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1098             : };
    1099             : 
    1100             : 
    1101             : const UConverterSharedData _CESU8Data=
    1102             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl);
    1103             : 
    1104             : #endif

Generated by: LCOV version 1.13