LCOV - output.info - intl/icu/source/common/ucnv

LCOV - code coverage report

Current view:	top level - intl/icu/source/common - ucnv_u16.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	713	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	17	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*  
       4             : **********************************************************************
       5             : *   Copyright (C) 2002-2015, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : *   file name:  ucnv_u16.c
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2002jul01
      14             : *   created by: Markus W. Scherer
      15             : *
      16             : *   UTF-16 converter implementation. Used to be in ucnv_utf.c.
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : 
      21             : #if !UCONFIG_NO_CONVERSION
      22             : 
      23             : #include "unicode/ucnv.h"
      24             : #include "unicode/uversion.h"
      25             : #include "ucnv_bld.h"
      26             : #include "ucnv_cnv.h"
      27             : #include "cmemory.h"
      28             : 
      29             : enum {
      30             :     UCNV_NEED_TO_WRITE_BOM=1
      31             : };
      32             : 
      33             : U_CDECL_BEGIN
      34             : /*
      35             :  * The UTF-16 toUnicode implementation is also used for the Java-specific
      36             :  * "with BOM" variants of UTF-16BE and UTF-16LE.
      37             :  */
      38             : static void  U_CALLCONV
      39             : _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
      40             :                            UErrorCode *pErrorCode);
      41             : 
      42             : /* UTF-16BE ----------------------------------------------------------------- */
      43             : 
      44             : #if U_IS_BIG_ENDIAN
      45             : #   define _UTF16PEFromUnicodeWithOffsets   _UTF16BEFromUnicodeWithOffsets
      46             : #else
      47             : #   define _UTF16PEFromUnicodeWithOffsets   _UTF16LEFromUnicodeWithOffsets
      48             : #endif
      49             : 
      50             : 
      51             : static void  U_CALLCONV
      52           0 : _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
      53             :                                UErrorCode *pErrorCode) {
      54             :     UConverter *cnv;
      55             :     const UChar *source;
      56             :     char *target;
      57             :     int32_t *offsets;
      58             : 
      59             :     uint32_t targetCapacity, length, sourceIndex;
      60             :     UChar c, trail;
      61             :     char overflow[4];
      62             : 
      63           0 :     source=pArgs->source;
      64           0 :     length=(int32_t)(pArgs->sourceLimit-source);
      65           0 :     if(length<=0) {
      66             :         /* no input, nothing to do */
      67           0 :         return;
      68             :     }
      69             : 
      70           0 :     cnv=pArgs->converter;
      71             : 
      72             :     /* write the BOM if necessary */
      73           0 :     if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
      74             :         static const char bom[]={ (char)0xfe, (char)0xff };
      75           0 :         ucnv_fromUWriteBytes(cnv,
      76             :                              bom, 2,
      77             :                              &pArgs->target, pArgs->targetLimit,
      78             :                              &pArgs->offsets, -1,
      79           0 :                              pErrorCode);
      80           0 :         cnv->fromUnicodeStatus=0;
      81             :     }
      82             : 
      83           0 :     target=pArgs->target;
      84           0 :     if(target >= pArgs->targetLimit) {
      85           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
      86           0 :         return;
      87             :     }
      88             : 
      89           0 :     targetCapacity=(uint32_t)(pArgs->targetLimit-target);
      90           0 :     offsets=pArgs->offsets;
      91           0 :     sourceIndex=0;
      92             : 
      93             :     /* c!=0 indicates in several places outside the main loops that a surrogate was found */
      94             : 
      95           0 :     if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
      96             :         /* the last buffer ended with a lead surrogate, output the surrogate pair */
      97           0 :         ++source;
      98           0 :         --length;
      99           0 :         target[0]=(uint8_t)(c>>8);
     100           0 :         target[1]=(uint8_t)c;
     101           0 :         target[2]=(uint8_t)(trail>>8);
     102           0 :         target[3]=(uint8_t)trail;
     103           0 :         target+=4;
     104           0 :         targetCapacity-=4;
     105           0 :         if(offsets!=NULL) {
     106           0 :             *offsets++=-1;
     107           0 :             *offsets++=-1;
     108           0 :             *offsets++=-1;
     109           0 :             *offsets++=-1;
     110             :         }
     111           0 :         sourceIndex=1;
     112           0 :         cnv->fromUChar32=c=0;
     113             :     }
     114             : 
     115           0 :     if(c==0) {
     116             :         /* copy an even number of bytes for complete UChars */
     117           0 :         uint32_t count=2*length;
     118           0 :         if(count>targetCapacity) {
     119           0 :             count=targetCapacity&~1;
     120             :         }
     121             :         /* count is even */
     122           0 :         targetCapacity-=count;
     123           0 :         count>>=1;
     124           0 :         length-=count;
     125             : 
     126           0 :         if(offsets==NULL) {
     127           0 :             while(count>0) {
     128           0 :                 c=*source++;
     129           0 :                 if(U16_IS_SINGLE(c)) {
     130           0 :                     target[0]=(uint8_t)(c>>8);
     131           0 :                     target[1]=(uint8_t)c;
     132           0 :                     target+=2;
     133           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
     134           0 :                     ++source;
     135           0 :                     --count;
     136           0 :                     target[0]=(uint8_t)(c>>8);
     137           0 :                     target[1]=(uint8_t)c;
     138           0 :                     target[2]=(uint8_t)(trail>>8);
     139           0 :                     target[3]=(uint8_t)trail;
     140           0 :                     target+=4;
     141             :                 } else {
     142           0 :                     break;
     143             :                 }
     144           0 :                 --count;
     145             :             }
     146             :         } else {
     147           0 :             while(count>0) {
     148           0 :                 c=*source++;
     149           0 :                 if(U16_IS_SINGLE(c)) {
     150           0 :                     target[0]=(uint8_t)(c>>8);
     151           0 :                     target[1]=(uint8_t)c;
     152           0 :                     target+=2;
     153           0 :                     *offsets++=sourceIndex;
     154           0 :                     *offsets++=sourceIndex++;
     155           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
     156           0 :                     ++source;
     157           0 :                     --count;
     158           0 :                     target[0]=(uint8_t)(c>>8);
     159           0 :                     target[1]=(uint8_t)c;
     160           0 :                     target[2]=(uint8_t)(trail>>8);
     161           0 :                     target[3]=(uint8_t)trail;
     162           0 :                     target+=4;
     163           0 :                     *offsets++=sourceIndex;
     164           0 :                     *offsets++=sourceIndex;
     165           0 :                     *offsets++=sourceIndex;
     166           0 :                     *offsets++=sourceIndex;
     167           0 :                     sourceIndex+=2;
     168             :                 } else {
     169           0 :                     break;
     170             :                 }
     171           0 :                 --count;
     172             :             }
     173             :         }
     174             : 
     175           0 :         if(count==0) {
     176             :             /* done with the loop for complete UChars */
     177           0 :             if(length>0 && targetCapacity>0) {
     178             :                 /*
     179             :                  * there is more input and some target capacity -
     180             :                  * it must be targetCapacity==1 because otherwise
     181             :                  * the above would have copied more;
     182             :                  * prepare for overflow output
     183             :                  */
     184           0 :                 if(U16_IS_SINGLE(c=*source++)) {
     185           0 :                     overflow[0]=(char)(c>>8);
     186           0 :                     overflow[1]=(char)c;
     187           0 :                     length=2; /* 2 bytes to output */
     188           0 :                     c=0;
     189             :                 /* } else { keep c for surrogate handling, length will be set there */
     190             :                 }
     191             :             } else {
     192           0 :                 length=0;
     193           0 :                 c=0;
     194             :             }
     195             :         } else {
     196             :             /* keep c for surrogate handling, length will be set there */
     197           0 :             targetCapacity+=2*count;
     198             :         }
     199             :     } else {
     200           0 :         length=0; /* from here on, length counts the bytes in overflow[] */
     201             :     }
     202             :     
     203           0 :     if(c!=0) {
     204             :         /*
     205             :          * c is a surrogate, and
     206             :          * - source or target too short
     207             :          * - or the surrogate is unmatched
     208             :          */
     209           0 :         length=0;
     210           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
     211           0 :             if(source<pArgs->sourceLimit) {
     212           0 :                 if(U16_IS_TRAIL(trail=*source)) {
     213             :                     /* output the surrogate pair, will overflow (see conditions comment above) */
     214           0 :                     ++source;
     215           0 :                     overflow[0]=(char)(c>>8);
     216           0 :                     overflow[1]=(char)c;
     217           0 :                     overflow[2]=(char)(trail>>8);
     218           0 :                     overflow[3]=(char)trail;
     219           0 :                     length=4; /* 4 bytes to output */
     220           0 :                     c=0;
     221             :                 } else {
     222             :                     /* unmatched lead surrogate */
     223           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     224             :                 }
     225             :             } else {
     226             :                 /* see if the trail surrogate is in the next buffer */
     227             :             }
     228             :         } else {
     229             :             /* unmatched trail surrogate */
     230           0 :             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     231             :         }
     232           0 :         cnv->fromUChar32=c;
     233             :     }
     234             : 
     235           0 :     if(length>0) {
     236             :         /* output length bytes with overflow (length>targetCapacity>0) */
     237           0 :         ucnv_fromUWriteBytes(cnv,
     238             :                              overflow, length,
     239             :                              (char **)&target, pArgs->targetLimit,
     240             :                              &offsets, sourceIndex,
     241           0 :                              pErrorCode);
     242           0 :         targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
     243             :     }
     244             : 
     245           0 :     if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
     246           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     247             :     }
     248             : 
     249             :     /* write back the updated pointers */
     250           0 :     pArgs->source=source;
     251           0 :     pArgs->target=(char *)target;
     252           0 :     pArgs->offsets=offsets;
     253             : }
     254             : 
     255             : static void  U_CALLCONV
     256           0 : _UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     257             :                              UErrorCode *pErrorCode) {
     258             :     UConverter *cnv;
     259             :     const uint8_t *source;
     260             :     UChar *target;
     261             :     int32_t *offsets;
     262             : 
     263             :     uint32_t targetCapacity, length, count, sourceIndex;
     264             :     UChar c, trail;
     265             : 
     266           0 :     if(pArgs->converter->mode<8) {
     267           0 :         _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
     268           0 :         return;
     269             :     }
     270             : 
     271           0 :     cnv=pArgs->converter;
     272           0 :     source=(const uint8_t *)pArgs->source;
     273           0 :     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
     274           0 :     if(length<=0 && cnv->toUnicodeStatus==0) {
     275             :         /* no input, nothing to do */
     276           0 :         return;
     277             :     }
     278             : 
     279           0 :     target=pArgs->target;
     280           0 :     if(target >= pArgs->targetLimit) {
     281           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     282           0 :         return;
     283             :     }
     284             : 
     285           0 :     targetCapacity=(uint32_t)(pArgs->targetLimit-target);
     286           0 :     offsets=pArgs->offsets;
     287           0 :     sourceIndex=0;
     288           0 :     c=0;
     289             : 
     290             :     /* complete a partial UChar or pair from the last call */
     291           0 :     if(cnv->toUnicodeStatus!=0) {
     292             :         /*
     293             :          * special case: single byte from a previous buffer,
     294             :          * where the byte turned out not to belong to a trail surrogate
     295             :          * and the preceding, unmatched lead surrogate was put into toUBytes[]
     296             :          * for error handling
     297             :          */
     298           0 :         cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
     299           0 :         cnv->toULength=1;
     300           0 :         cnv->toUnicodeStatus=0;
     301             :     }
     302           0 :     if((count=cnv->toULength)!=0) {
     303           0 :         uint8_t *p=cnv->toUBytes;
     304           0 :         do {
     305           0 :             p[count++]=*source++;
     306           0 :             ++sourceIndex;
     307           0 :             --length;
     308           0 :             if(count==2) {
     309           0 :                 c=((UChar)p[0]<<8)|p[1];
     310           0 :                 if(U16_IS_SINGLE(c)) {
     311             :                     /* output the BMP code point */
     312           0 :                     *target++=c;
     313           0 :                     if(offsets!=NULL) {
     314           0 :                         *offsets++=-1;
     315             :                     }
     316           0 :                     --targetCapacity;
     317           0 :                     count=0;
     318           0 :                     c=0;
     319           0 :                     break;
     320           0 :                 } else if(U16_IS_SURROGATE_LEAD(c)) {
     321             :                     /* continue collecting bytes for the trail surrogate */
     322           0 :                     c=0; /* avoid unnecessary surrogate handling below */
     323             :                 } else {
     324             :                     /* fall through to error handling for an unmatched trail surrogate */
     325           0 :                     break;
     326             :                 }
     327           0 :             } else if(count==4) {
     328           0 :                 c=((UChar)p[0]<<8)|p[1];
     329           0 :                 trail=((UChar)p[2]<<8)|p[3];
     330           0 :                 if(U16_IS_TRAIL(trail)) {
     331             :                     /* output the surrogate pair */
     332           0 :                     *target++=c;
     333           0 :                     if(targetCapacity>=2) {
     334           0 :                         *target++=trail;
     335           0 :                         if(offsets!=NULL) {
     336           0 :                             *offsets++=-1;
     337           0 :                             *offsets++=-1;
     338             :                         }
     339           0 :                         targetCapacity-=2;
     340             :                     } else /* targetCapacity==1 */ {
     341           0 :                         targetCapacity=0;
     342           0 :                         cnv->UCharErrorBuffer[0]=trail;
     343           0 :                         cnv->UCharErrorBufferLength=1;
     344           0 :                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     345             :                     }
     346           0 :                     count=0;
     347           0 :                     c=0;
     348           0 :                     break;
     349             :                 } else {
     350             :                     /* unmatched lead surrogate, handle here for consistent toUBytes[] */
     351           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     352             : 
     353             :                     /* back out reading the code unit after it */
     354           0 :                     if(((const uint8_t *)pArgs->source-source)>=2) {
     355           0 :                         source-=2;
     356             :                     } else {
     357             :                         /*
     358             :                          * if the trail unit's first byte was in a previous buffer, then
     359             :                          * we need to put it into a special place because toUBytes[] will be
     360             :                          * used for the lead unit's bytes
     361             :                          */
     362           0 :                         cnv->toUnicodeStatus=0x100|p[2];
     363           0 :                         --source;
     364             :                     }
     365           0 :                     cnv->toULength=2;
     366             : 
     367             :                     /* write back the updated pointers */
     368           0 :                     pArgs->source=(const char *)source;
     369           0 :                     pArgs->target=target;
     370           0 :                     pArgs->offsets=offsets;
     371           0 :                     return;
     372             :                 }
     373             :             }
     374           0 :         } while(length>0);
     375           0 :         cnv->toULength=(int8_t)count;
     376             :     }
     377             : 
     378             :     /* copy an even number of bytes for complete UChars */
     379           0 :     count=2*targetCapacity;
     380           0 :     if(count>length) {
     381           0 :         count=length&~1;
     382             :     }
     383           0 :     if(c==0 && count>0) {
     384           0 :         length-=count;
     385           0 :         count>>=1;
     386           0 :         targetCapacity-=count;
     387           0 :         if(offsets==NULL) {
     388           0 :             do {
     389           0 :                 c=((UChar)source[0]<<8)|source[1];
     390           0 :                 source+=2;
     391           0 :                 if(U16_IS_SINGLE(c)) {
     392           0 :                     *target++=c;
     393           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
     394           0 :                           U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
     395             :                 ) {
     396           0 :                     source+=2;
     397           0 :                     --count;
     398           0 :                     *target++=c;
     399           0 :                     *target++=trail;
     400             :                 } else {
     401           0 :                     break;
     402             :                 }
     403             :             } while(--count>0);
     404             :         } else {
     405           0 :             do {
     406           0 :                 c=((UChar)source[0]<<8)|source[1];
     407           0 :                 source+=2;
     408           0 :                 if(U16_IS_SINGLE(c)) {
     409           0 :                     *target++=c;
     410           0 :                     *offsets++=sourceIndex;
     411           0 :                     sourceIndex+=2;
     412           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
     413           0 :                           U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
     414             :                 ) {
     415           0 :                     source+=2;
     416           0 :                     --count;
     417           0 :                     *target++=c;
     418           0 :                     *target++=trail;
     419           0 :                     *offsets++=sourceIndex;
     420           0 :                     *offsets++=sourceIndex;
     421           0 :                     sourceIndex+=4;
     422             :                 } else {
     423           0 :                     break;
     424             :                 }
     425             :             } while(--count>0);
     426             :         }
     427             : 
     428           0 :         if(count==0) {
     429             :             /* done with the loop for complete UChars */
     430           0 :             c=0;
     431             :         } else {
     432             :             /* keep c for surrogate handling, trail will be set there */
     433           0 :             length+=2*(count-1); /* one more byte pair was consumed than count decremented */
     434           0 :             targetCapacity+=count;
     435             :         }
     436             :     }
     437             : 
     438           0 :     if(c!=0) {
     439             :         /*
     440             :          * c is a surrogate, and
     441             :          * - source or target too short
     442             :          * - or the surrogate is unmatched
     443             :          */
     444           0 :         cnv->toUBytes[0]=(uint8_t)(c>>8);
     445           0 :         cnv->toUBytes[1]=(uint8_t)c;
     446           0 :         cnv->toULength=2;
     447             : 
     448           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
     449           0 :             if(length>=2) {
     450           0 :                 if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) {
     451             :                     /* output the surrogate pair, will overflow (see conditions comment above) */
     452           0 :                     source+=2;
     453           0 :                     length-=2;
     454           0 :                     *target++=c;
     455           0 :                     if(offsets!=NULL) {
     456           0 :                         *offsets++=sourceIndex;
     457             :                     }
     458           0 :                     cnv->UCharErrorBuffer[0]=trail;
     459           0 :                     cnv->UCharErrorBufferLength=1;
     460           0 :                     cnv->toULength=0;
     461           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     462             :                 } else {
     463             :                     /* unmatched lead surrogate */
     464           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     465             :                 }
     466             :             } else {
     467             :                 /* see if the trail surrogate is in the next buffer */
     468             :             }
     469             :         } else {
     470             :             /* unmatched trail surrogate */
     471           0 :             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     472             :         }
     473             :     }
     474             : 
     475           0 :     if(U_SUCCESS(*pErrorCode)) {
     476             :         /* check for a remaining source byte */
     477           0 :         if(length>0) {
     478           0 :             if(targetCapacity==0) {
     479           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     480             :             } else {
     481             :                 /* it must be length==1 because otherwise the above would have copied more */
     482           0 :                 cnv->toUBytes[cnv->toULength++]=*source++;
     483             :             }
     484             :         }
     485             :     }
     486             : 
     487             :     /* write back the updated pointers */
     488           0 :     pArgs->source=(const char *)source;
     489           0 :     pArgs->target=target;
     490           0 :     pArgs->offsets=offsets;
     491             : }
     492             : 
     493             : static UChar32  U_CALLCONV
     494           0 : _UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
     495             :     const uint8_t *s, *sourceLimit;
     496             :     UChar32 c;
     497             : 
     498           0 :     if(pArgs->converter->mode<8) {
     499           0 :         return UCNV_GET_NEXT_UCHAR_USE_TO_U;
     500             :     }
     501             : 
     502           0 :     s=(const uint8_t *)pArgs->source;
     503           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
     504             : 
     505           0 :     if(s>=sourceLimit) {
     506             :         /* no input */
     507           0 :         *err=U_INDEX_OUTOFBOUNDS_ERROR;
     508           0 :         return 0xffff;
     509             :     }
     510             : 
     511           0 :     if(s+2>sourceLimit) {
     512             :         /* only one byte: truncated UChar */
     513           0 :         pArgs->converter->toUBytes[0]=*s++;
     514           0 :         pArgs->converter->toULength=1;
     515           0 :         pArgs->source=(const char *)s;
     516           0 :         *err = U_TRUNCATED_CHAR_FOUND;
     517           0 :         return 0xffff;
     518             :     }
     519             : 
     520             :     /* get one UChar */
     521           0 :     c=((UChar32)*s<<8)|s[1];
     522           0 :     s+=2;
     523             : 
     524             :     /* check for a surrogate pair */
     525           0 :     if(U_IS_SURROGATE(c)) {
     526           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
     527           0 :             if(s+2<=sourceLimit) {
     528             :                 UChar trail;
     529             : 
     530             :                 /* get a second UChar and see if it is a trail surrogate */
     531           0 :                 trail=((UChar)*s<<8)|s[1];
     532           0 :                 if(U16_IS_TRAIL(trail)) {
     533           0 :                     c=U16_GET_SUPPLEMENTARY(c, trail);
     534           0 :                     s+=2;
     535             :                 } else {
     536             :                     /* unmatched lead surrogate */
     537           0 :                     c=-2;
     538             :                 }
     539             :             } else {
     540             :                 /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
     541           0 :                 uint8_t *bytes=pArgs->converter->toUBytes;
     542           0 :                 s-=2;
     543           0 :                 pArgs->converter->toULength=(int8_t)(sourceLimit-s);
     544           0 :                 do {
     545           0 :                     *bytes++=*s++;
     546           0 :                 } while(s<sourceLimit);
     547             : 
     548           0 :                 c=0xffff;
     549           0 :                 *err=U_TRUNCATED_CHAR_FOUND;
     550             :             }
     551             :         } else {
     552             :             /* unmatched trail surrogate */
     553           0 :             c=-2;
     554             :         }
     555             : 
     556           0 :         if(c<0) {
     557             :             /* write the unmatched surrogate */
     558           0 :             uint8_t *bytes=pArgs->converter->toUBytes;
     559           0 :             pArgs->converter->toULength=2;
     560           0 :             *bytes=*(s-2);
     561           0 :             bytes[1]=*(s-1);
     562             : 
     563           0 :             c=0xffff;
     564           0 :             *err=U_ILLEGAL_CHAR_FOUND;
     565             :         }
     566             :     }
     567             : 
     568           0 :     pArgs->source=(const char *)s;
     569           0 :     return c;
     570             : } 
     571             : 
     572             : static void  U_CALLCONV
     573           0 : _UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) {
     574           0 :     if(choice<=UCNV_RESET_TO_UNICODE) {
     575             :         /* reset toUnicode state */
     576           0 :         if(UCNV_GET_VERSION(cnv)==0) {
     577           0 :             cnv->mode=8; /* no BOM handling */
     578             :         } else {
     579           0 :             cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */
     580             :         }
     581             :     }
     582           0 :     if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
     583             :         /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */
     584           0 :         cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
     585             :     }
     586           0 : }
     587             : 
     588             : static void  U_CALLCONV
     589           0 : _UTF16BEOpen(UConverter *cnv,
     590             :              UConverterLoadArgs *pArgs,
     591             :              UErrorCode *pErrorCode) {
     592             :     (void)pArgs;
     593           0 :     if(UCNV_GET_VERSION(cnv)<=1) {
     594           0 :         _UTF16BEReset(cnv, UCNV_RESET_BOTH);
     595             :     } else {
     596           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     597             :     }
     598           0 : }
     599             : 
     600             : static const char *  U_CALLCONV
     601           0 : _UTF16BEGetName(const UConverter *cnv) {
     602           0 :     if(UCNV_GET_VERSION(cnv)==0) {
     603           0 :         return "UTF-16BE";
     604             :     } else {
     605           0 :         return "UTF-16BE,version=1";
     606             :     }
     607             : }
     608             : U_CDECL_END
     609             : 
     610             : static const UConverterImpl _UTF16BEImpl={
     611             :     UCNV_UTF16_BigEndian,
     612             : 
     613             :     NULL,
     614             :     NULL,
     615             : 
     616             :     _UTF16BEOpen,
     617             :     NULL,
     618             :     _UTF16BEReset,
     619             : 
     620             :     _UTF16BEToUnicodeWithOffsets,
     621             :     _UTF16BEToUnicodeWithOffsets,
     622             :     _UTF16BEFromUnicodeWithOffsets,
     623             :     _UTF16BEFromUnicodeWithOffsets,
     624             :     _UTF16BEGetNextUChar,
     625             : 
     626             :     NULL,
     627             :     _UTF16BEGetName,
     628             :     NULL,
     629             :     NULL,
     630             :     ucnv_getNonSurrogateUnicodeSet,
     631             : 
     632             :     NULL,
     633             :     NULL
     634             : };
     635             : 
     636             : static const UConverterStaticData _UTF16BEStaticData={
     637             :     sizeof(UConverterStaticData),
     638             :     "UTF-16BE",
     639             :     1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
     640             :     { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
     641             :     0,
     642             :     0,
     643             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     644             : };
     645             : 
     646             : 
     647             : const UConverterSharedData _UTF16BEData=
     648             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl);
     649             : 
     650             : /* UTF-16LE ----------------------------------------------------------------- */
     651             : U_CDECL_BEGIN
     652             : static void  U_CALLCONV
     653           0 : _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     654             :                                UErrorCode *pErrorCode) {
     655             :     UConverter *cnv;
     656             :     const UChar *source;
     657             :     char *target;
     658             :     int32_t *offsets;
     659             : 
     660             :     uint32_t targetCapacity, length, sourceIndex;
     661             :     UChar c, trail;
     662             :     char overflow[4];
     663             : 
     664           0 :     source=pArgs->source;
     665           0 :     length=(int32_t)(pArgs->sourceLimit-source);
     666           0 :     if(length<=0) {
     667             :         /* no input, nothing to do */
     668           0 :         return;
     669             :     }
     670             : 
     671           0 :     cnv=pArgs->converter;
     672             : 
     673             :     /* write the BOM if necessary */
     674           0 :     if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
     675             :         static const char bom[]={ (char)0xff, (char)0xfe };
     676           0 :         ucnv_fromUWriteBytes(cnv,
     677             :                              bom, 2,
     678             :                              &pArgs->target, pArgs->targetLimit,
     679             :                              &pArgs->offsets, -1,
     680           0 :                              pErrorCode);
     681           0 :         cnv->fromUnicodeStatus=0;
     682             :     }
     683             : 
     684           0 :     target=pArgs->target;
     685           0 :     if(target >= pArgs->targetLimit) {
     686           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     687           0 :         return;
     688             :     }
     689             : 
     690           0 :     targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
     691           0 :     offsets=pArgs->offsets;
     692           0 :     sourceIndex=0;
     693             : 
     694             :     /* c!=0 indicates in several places outside the main loops that a surrogate was found */
     695             : 
     696           0 :     if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
     697             :         /* the last buffer ended with a lead surrogate, output the surrogate pair */
     698           0 :         ++source;
     699           0 :         --length;
     700           0 :         target[0]=(uint8_t)c;
     701           0 :         target[1]=(uint8_t)(c>>8);
     702           0 :         target[2]=(uint8_t)trail;
     703           0 :         target[3]=(uint8_t)(trail>>8);
     704           0 :         target+=4;
     705           0 :         targetCapacity-=4;
     706           0 :         if(offsets!=NULL) {
     707           0 :             *offsets++=-1;
     708           0 :             *offsets++=-1;
     709           0 :             *offsets++=-1;
     710           0 :             *offsets++=-1;
     711             :         }
     712           0 :         sourceIndex=1;
     713           0 :         cnv->fromUChar32=c=0;
     714             :     }
     715             : 
     716           0 :     if(c==0) {
     717             :         /* copy an even number of bytes for complete UChars */
     718           0 :         uint32_t count=2*length;
     719           0 :         if(count>targetCapacity) {
     720           0 :             count=targetCapacity&~1;
     721             :         }
     722             :         /* count is even */
     723           0 :         targetCapacity-=count;
     724           0 :         count>>=1;
     725           0 :         length-=count;
     726             : 
     727           0 :         if(offsets==NULL) {
     728           0 :             while(count>0) {
     729           0 :                 c=*source++;
     730           0 :                 if(U16_IS_SINGLE(c)) {
     731           0 :                     target[0]=(uint8_t)c;
     732           0 :                     target[1]=(uint8_t)(c>>8);
     733           0 :                     target+=2;
     734           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
     735           0 :                     ++source;
     736           0 :                     --count;
     737           0 :                     target[0]=(uint8_t)c;
     738           0 :                     target[1]=(uint8_t)(c>>8);
     739           0 :                     target[2]=(uint8_t)trail;
     740           0 :                     target[3]=(uint8_t)(trail>>8);
     741           0 :                     target+=4;
     742             :                 } else {
     743           0 :                     break;
     744             :                 }
     745           0 :                 --count;
     746             :             }
     747             :         } else {
     748           0 :             while(count>0) {
     749           0 :                 c=*source++;
     750           0 :                 if(U16_IS_SINGLE(c)) {
     751           0 :                     target[0]=(uint8_t)c;
     752           0 :                     target[1]=(uint8_t)(c>>8);
     753           0 :                     target+=2;
     754           0 :                     *offsets++=sourceIndex;
     755           0 :                     *offsets++=sourceIndex++;
     756           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
     757           0 :                     ++source;
     758           0 :                     --count;
     759           0 :                     target[0]=(uint8_t)c;
     760           0 :                     target[1]=(uint8_t)(c>>8);
     761           0 :                     target[2]=(uint8_t)trail;
     762           0 :                     target[3]=(uint8_t)(trail>>8);
     763           0 :                     target+=4;
     764           0 :                     *offsets++=sourceIndex;
     765           0 :                     *offsets++=sourceIndex;
     766           0 :                     *offsets++=sourceIndex;
     767           0 :                     *offsets++=sourceIndex;
     768           0 :                     sourceIndex+=2;
     769             :                 } else {
     770           0 :                     break;
     771             :                 }
     772           0 :                 --count;
     773             :             }
     774             :         }
     775             : 
     776           0 :         if(count==0) {
     777             :             /* done with the loop for complete UChars */
     778           0 :             if(length>0 && targetCapacity>0) {
     779             :                 /*
     780             :                  * there is more input and some target capacity -
     781             :                  * it must be targetCapacity==1 because otherwise
     782             :                  * the above would have copied more;
     783             :                  * prepare for overflow output
     784             :                  */
     785           0 :                 if(U16_IS_SINGLE(c=*source++)) {
     786           0 :                     overflow[0]=(char)c;
     787           0 :                     overflow[1]=(char)(c>>8);
     788           0 :                     length=2; /* 2 bytes to output */
     789           0 :                     c=0;
     790             :                 /* } else { keep c for surrogate handling, length will be set there */
     791             :                 }
     792             :             } else {
     793           0 :                 length=0;
     794           0 :                 c=0;
     795             :             }
     796             :         } else {
     797             :             /* keep c for surrogate handling, length will be set there */
     798           0 :             targetCapacity+=2*count;
     799             :         }
     800             :     } else {
     801           0 :         length=0; /* from here on, length counts the bytes in overflow[] */
     802             :     }
     803             :     
     804           0 :     if(c!=0) {
     805             :         /*
     806             :          * c is a surrogate, and
     807             :          * - source or target too short
     808             :          * - or the surrogate is unmatched
     809             :          */
     810           0 :         length=0;
     811           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
     812           0 :             if(source<pArgs->sourceLimit) {
     813           0 :                 if(U16_IS_TRAIL(trail=*source)) {
     814             :                     /* output the surrogate pair, will overflow (see conditions comment above) */
     815           0 :                     ++source;
     816           0 :                     overflow[0]=(char)c;
     817           0 :                     overflow[1]=(char)(c>>8);
     818           0 :                     overflow[2]=(char)trail;
     819           0 :                     overflow[3]=(char)(trail>>8);
     820           0 :                     length=4; /* 4 bytes to output */
     821           0 :                     c=0;
     822             :                 } else {
     823             :                     /* unmatched lead surrogate */
     824           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     825             :                 }
     826             :             } else {
     827             :                 /* see if the trail surrogate is in the next buffer */
     828             :             }
     829             :         } else {
     830             :             /* unmatched trail surrogate */
     831           0 :             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     832             :         }
     833           0 :         cnv->fromUChar32=c;
     834             :     }
     835             : 
     836           0 :     if(length>0) {
     837             :         /* output length bytes with overflow (length>targetCapacity>0) */
     838           0 :         ucnv_fromUWriteBytes(cnv,
     839             :                              overflow, length,
     840             :                              &target, pArgs->targetLimit,
     841             :                              &offsets, sourceIndex,
     842           0 :                              pErrorCode);
     843           0 :         targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
     844             :     }
     845             : 
     846           0 :     if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
     847           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     848             :     }
     849             : 
     850             :     /* write back the updated pointers */
     851           0 :     pArgs->source=source;
     852           0 :     pArgs->target=target;
     853           0 :     pArgs->offsets=offsets;
     854             : }
     855             : 
     856             : static void  U_CALLCONV
     857           0 : _UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     858             :                              UErrorCode *pErrorCode) {
     859             :     UConverter *cnv;
     860             :     const uint8_t *source;
     861             :     UChar *target;
     862             :     int32_t *offsets;
     863             : 
     864             :     uint32_t targetCapacity, length, count, sourceIndex;
     865             :     UChar c, trail;
     866             : 
     867           0 :     if(pArgs->converter->mode<8) {
     868           0 :         _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
     869           0 :         return;
     870             :     }
     871             : 
     872           0 :     cnv=pArgs->converter;
     873           0 :     source=(const uint8_t *)pArgs->source;
     874           0 :     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
     875           0 :     if(length<=0 && cnv->toUnicodeStatus==0) {
     876             :         /* no input, nothing to do */
     877           0 :         return;
     878             :     }
     879             : 
     880           0 :     target=pArgs->target;
     881           0 :     if(target >= pArgs->targetLimit) {
     882           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     883           0 :         return;
     884             :     }
     885             : 
     886           0 :     targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
     887           0 :     offsets=pArgs->offsets;
     888           0 :     sourceIndex=0;
     889           0 :     c=0;
     890             : 
     891             :     /* complete a partial UChar or pair from the last call */
     892           0 :     if(cnv->toUnicodeStatus!=0) {
     893             :         /*
     894             :          * special case: single byte from a previous buffer,
     895             :          * where the byte turned out not to belong to a trail surrogate
     896             :          * and the preceding, unmatched lead surrogate was put into toUBytes[]
     897             :          * for error handling
     898             :          */
     899           0 :         cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
     900           0 :         cnv->toULength=1;
     901           0 :         cnv->toUnicodeStatus=0;
     902             :     }
     903           0 :     if((count=cnv->toULength)!=0) {
     904           0 :         uint8_t *p=cnv->toUBytes;
     905           0 :         do {
     906           0 :             p[count++]=*source++;
     907           0 :             ++sourceIndex;
     908           0 :             --length;
     909           0 :             if(count==2) {
     910           0 :                 c=((UChar)p[1]<<8)|p[0];
     911           0 :                 if(U16_IS_SINGLE(c)) {
     912             :                     /* output the BMP code point */
     913           0 :                     *target++=c;
     914           0 :                     if(offsets!=NULL) {
     915           0 :                         *offsets++=-1;
     916             :                     }
     917           0 :                     --targetCapacity;
     918           0 :                     count=0;
     919           0 :                     c=0;
     920           0 :                     break;
     921           0 :                 } else if(U16_IS_SURROGATE_LEAD(c)) {
     922             :                     /* continue collecting bytes for the trail surrogate */
     923           0 :                     c=0; /* avoid unnecessary surrogate handling below */
     924             :                 } else {
     925             :                     /* fall through to error handling for an unmatched trail surrogate */
     926           0 :                     break;
     927             :                 }
     928           0 :             } else if(count==4) {
     929           0 :                 c=((UChar)p[1]<<8)|p[0];
     930           0 :                 trail=((UChar)p[3]<<8)|p[2];
     931           0 :                 if(U16_IS_TRAIL(trail)) {
     932             :                     /* output the surrogate pair */
     933           0 :                     *target++=c;
     934           0 :                     if(targetCapacity>=2) {
     935           0 :                         *target++=trail;
     936           0 :                         if(offsets!=NULL) {
     937           0 :                             *offsets++=-1;
     938           0 :                             *offsets++=-1;
     939             :                         }
     940           0 :                         targetCapacity-=2;
     941             :                     } else /* targetCapacity==1 */ {
     942           0 :                         targetCapacity=0;
     943           0 :                         cnv->UCharErrorBuffer[0]=trail;
     944           0 :                         cnv->UCharErrorBufferLength=1;
     945           0 :                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     946             :                     }
     947           0 :                     count=0;
     948           0 :                     c=0;
     949           0 :                     break;
     950             :                 } else {
     951             :                     /* unmatched lead surrogate, handle here for consistent toUBytes[] */
     952           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     953             : 
     954             :                     /* back out reading the code unit after it */
     955           0 :                     if(((const uint8_t *)pArgs->source-source)>=2) {
     956           0 :                         source-=2;
     957             :                     } else {
     958             :                         /*
     959             :                          * if the trail unit's first byte was in a previous buffer, then
     960             :                          * we need to put it into a special place because toUBytes[] will be
     961             :                          * used for the lead unit's bytes
     962             :                          */
     963           0 :                         cnv->toUnicodeStatus=0x100|p[2];
     964           0 :                         --source;
     965             :                     }
     966           0 :                     cnv->toULength=2;
     967             : 
     968             :                     /* write back the updated pointers */
     969           0 :                     pArgs->source=(const char *)source;
     970           0 :                     pArgs->target=target;
     971           0 :                     pArgs->offsets=offsets;
     972           0 :                     return;
     973             :                 }
     974             :             }
     975           0 :         } while(length>0);
     976           0 :         cnv->toULength=(int8_t)count;
     977             :     }
     978             : 
     979             :     /* copy an even number of bytes for complete UChars */
     980           0 :     count=2*targetCapacity;
     981           0 :     if(count>length) {
     982           0 :         count=length&~1;
     983             :     }
     984           0 :     if(c==0 && count>0) {
     985           0 :         length-=count;
     986           0 :         count>>=1;
     987           0 :         targetCapacity-=count;
     988           0 :         if(offsets==NULL) {
     989           0 :             do {
     990           0 :                 c=((UChar)source[1]<<8)|source[0];
     991           0 :                 source+=2;
     992           0 :                 if(U16_IS_SINGLE(c)) {
     993           0 :                     *target++=c;
     994           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
     995           0 :                           U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
     996             :                 ) {
     997           0 :                     source+=2;
     998           0 :                     --count;
     999           0 :                     *target++=c;
    1000           0 :                     *target++=trail;
    1001             :                 } else {
    1002           0 :                     break;
    1003             :                 }
    1004             :             } while(--count>0);
    1005             :         } else {
    1006           0 :             do {
    1007           0 :                 c=((UChar)source[1]<<8)|source[0];
    1008           0 :                 source+=2;
    1009           0 :                 if(U16_IS_SINGLE(c)) {
    1010           0 :                     *target++=c;
    1011           0 :                     *offsets++=sourceIndex;
    1012           0 :                     sourceIndex+=2;
    1013           0 :                 } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
    1014           0 :                           U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
    1015             :                 ) {
    1016           0 :                     source+=2;
    1017           0 :                     --count;
    1018           0 :                     *target++=c;
    1019           0 :                     *target++=trail;
    1020           0 :                     *offsets++=sourceIndex;
    1021           0 :                     *offsets++=sourceIndex;
    1022           0 :                     sourceIndex+=4;
    1023             :                 } else {
    1024           0 :                     break;
    1025             :                 }
    1026             :             } while(--count>0);
    1027             :         }
    1028             : 
    1029           0 :         if(count==0) {
    1030             :             /* done with the loop for complete UChars */
    1031           0 :             c=0;
    1032             :         } else {
    1033             :             /* keep c for surrogate handling, trail will be set there */
    1034           0 :             length+=2*(count-1); /* one more byte pair was consumed than count decremented */
    1035           0 :             targetCapacity+=count;
    1036             :         }
    1037             :     }
    1038             : 
    1039           0 :     if(c!=0) {
    1040             :         /*
    1041             :          * c is a surrogate, and
    1042             :          * - source or target too short
    1043             :          * - or the surrogate is unmatched
    1044             :          */
    1045           0 :         cnv->toUBytes[0]=(uint8_t)c;
    1046           0 :         cnv->toUBytes[1]=(uint8_t)(c>>8);
    1047           0 :         cnv->toULength=2;
    1048             : 
    1049           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
    1050           0 :             if(length>=2) {
    1051           0 :                 if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) {
    1052             :                     /* output the surrogate pair, will overflow (see conditions comment above) */
    1053           0 :                     source+=2;
    1054           0 :                     length-=2;
    1055           0 :                     *target++=c;
    1056           0 :                     if(offsets!=NULL) {
    1057           0 :                         *offsets++=sourceIndex;
    1058             :                     }
    1059           0 :                     cnv->UCharErrorBuffer[0]=trail;
    1060           0 :                     cnv->UCharErrorBufferLength=1;
    1061           0 :                     cnv->toULength=0;
    1062           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1063             :                 } else {
    1064             :                     /* unmatched lead surrogate */
    1065           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1066             :                 }
    1067             :             } else {
    1068             :                 /* see if the trail surrogate is in the next buffer */
    1069             :             }
    1070             :         } else {
    1071             :             /* unmatched trail surrogate */
    1072           0 :             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1073             :         }
    1074             :     }
    1075             : 
    1076           0 :     if(U_SUCCESS(*pErrorCode)) {
    1077             :         /* check for a remaining source byte */
    1078           0 :         if(length>0) {
    1079           0 :             if(targetCapacity==0) {
    1080           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1081             :             } else {
    1082             :                 /* it must be length==1 because otherwise the above would have copied more */
    1083           0 :                 cnv->toUBytes[cnv->toULength++]=*source++;
    1084             :             }
    1085             :         }
    1086             :     }
    1087             : 
    1088             :     /* write back the updated pointers */
    1089           0 :     pArgs->source=(const char *)source;
    1090           0 :     pArgs->target=target;
    1091           0 :     pArgs->offsets=offsets;
    1092             : }
    1093             : 
    1094             : static UChar32  U_CALLCONV
    1095           0 : _UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
    1096             :     const uint8_t *s, *sourceLimit;
    1097             :     UChar32 c;
    1098             : 
    1099           0 :     if(pArgs->converter->mode<8) {
    1100           0 :         return UCNV_GET_NEXT_UCHAR_USE_TO_U;
    1101             :     }
    1102             : 
    1103           0 :     s=(const uint8_t *)pArgs->source;
    1104           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    1105             : 
    1106           0 :     if(s>=sourceLimit) {
    1107             :         /* no input */
    1108           0 :         *err=U_INDEX_OUTOFBOUNDS_ERROR;
    1109           0 :         return 0xffff;
    1110             :     }
    1111             : 
    1112           0 :     if(s+2>sourceLimit) {
    1113             :         /* only one byte: truncated UChar */
    1114           0 :         pArgs->converter->toUBytes[0]=*s++;
    1115           0 :         pArgs->converter->toULength=1;
    1116           0 :         pArgs->source=(const char *)s;
    1117           0 :         *err = U_TRUNCATED_CHAR_FOUND;
    1118           0 :         return 0xffff;
    1119             :     }
    1120             : 
    1121             :     /* get one UChar */
    1122           0 :     c=((UChar32)s[1]<<8)|*s;
    1123           0 :     s+=2;
    1124             : 
    1125             :     /* check for a surrogate pair */
    1126           0 :     if(U_IS_SURROGATE(c)) {
    1127           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
    1128           0 :             if(s+2<=sourceLimit) {
    1129             :                 UChar trail;
    1130             : 
    1131             :                 /* get a second UChar and see if it is a trail surrogate */
    1132           0 :                 trail=((UChar)s[1]<<8)|*s;
    1133           0 :                 if(U16_IS_TRAIL(trail)) {
    1134           0 :                     c=U16_GET_SUPPLEMENTARY(c, trail);
    1135           0 :                     s+=2;
    1136             :                 } else {
    1137             :                     /* unmatched lead surrogate */
    1138           0 :                     c=-2;
    1139             :                 }
    1140             :             } else {
    1141             :                 /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
    1142           0 :                 uint8_t *bytes=pArgs->converter->toUBytes;
    1143           0 :                 s-=2;
    1144           0 :                 pArgs->converter->toULength=(int8_t)(sourceLimit-s);
    1145           0 :                 do {
    1146           0 :                     *bytes++=*s++;
    1147           0 :                 } while(s<sourceLimit);
    1148             : 
    1149           0 :                 c=0xffff;
    1150           0 :                 *err=U_TRUNCATED_CHAR_FOUND;
    1151             :             }
    1152             :         } else {
    1153             :             /* unmatched trail surrogate */
    1154           0 :             c=-2;
    1155             :         }
    1156             : 
    1157           0 :         if(c<0) {
    1158             :             /* write the unmatched surrogate */
    1159           0 :             uint8_t *bytes=pArgs->converter->toUBytes;
    1160           0 :             pArgs->converter->toULength=2;
    1161           0 :             *bytes=*(s-2);
    1162           0 :             bytes[1]=*(s-1);
    1163             : 
    1164           0 :             c=0xffff;
    1165           0 :             *err=U_ILLEGAL_CHAR_FOUND;
    1166             :         }
    1167             :     }
    1168             : 
    1169           0 :     pArgs->source=(const char *)s;
    1170           0 :     return c;
    1171             : } 
    1172             : 
    1173             : static void  U_CALLCONV
    1174           0 : _UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) {
    1175           0 :     if(choice<=UCNV_RESET_TO_UNICODE) {
    1176             :         /* reset toUnicode state */
    1177           0 :         if(UCNV_GET_VERSION(cnv)==0) {
    1178           0 :             cnv->mode=8; /* no BOM handling */
    1179             :         } else {
    1180           0 :             cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */
    1181             :         }
    1182             :     }
    1183           0 :     if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
    1184             :         /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */
    1185           0 :         cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
    1186             :     }
    1187           0 : }
    1188             : 
    1189             : static void  U_CALLCONV
    1190           0 : _UTF16LEOpen(UConverter *cnv,
    1191             :              UConverterLoadArgs *pArgs,
    1192             :              UErrorCode *pErrorCode) {
    1193             :     (void)pArgs;
    1194           0 :     if(UCNV_GET_VERSION(cnv)<=1) {
    1195           0 :         _UTF16LEReset(cnv, UCNV_RESET_BOTH);
    1196             :     } else {
    1197           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1198             :     }
    1199           0 : }
    1200             : 
    1201             : static const char *  U_CALLCONV
    1202           0 : _UTF16LEGetName(const UConverter *cnv) {
    1203           0 :     if(UCNV_GET_VERSION(cnv)==0) {
    1204           0 :         return "UTF-16LE";
    1205             :     } else {
    1206           0 :         return "UTF-16LE,version=1";
    1207             :     }
    1208             : }
    1209             : U_CDECL_END
    1210             : 
    1211             : static const UConverterImpl _UTF16LEImpl={
    1212             :     UCNV_UTF16_LittleEndian,
    1213             : 
    1214             :     NULL,
    1215             :     NULL,
    1216             : 
    1217             :     _UTF16LEOpen,
    1218             :     NULL,
    1219             :     _UTF16LEReset,
    1220             : 
    1221             :     _UTF16LEToUnicodeWithOffsets,
    1222             :     _UTF16LEToUnicodeWithOffsets,
    1223             :     _UTF16LEFromUnicodeWithOffsets,
    1224             :     _UTF16LEFromUnicodeWithOffsets,
    1225             :     _UTF16LEGetNextUChar,
    1226             : 
    1227             :     NULL,
    1228             :     _UTF16LEGetName,
    1229             :     NULL,
    1230             :     NULL,
    1231             :     ucnv_getNonSurrogateUnicodeSet,
    1232             : 
    1233             :     NULL,
    1234             :     NULL
    1235             : };
    1236             : 
    1237             : 
    1238             : static const UConverterStaticData _UTF16LEStaticData={
    1239             :     sizeof(UConverterStaticData),
    1240             :     "UTF-16LE",
    1241             :     1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
    1242             :     { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
    1243             :     0,
    1244             :     0,
    1245             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1246             : };
    1247             : 
    1248             : 
    1249             : const UConverterSharedData _UTF16LEData=
    1250             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl);
    1251             : 
    1252             : /* UTF-16 (Detect BOM) ------------------------------------------------------ */
    1253             : 
    1254             : /*
    1255             :  * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
    1256             :  * accordingly.
    1257             :  * This is a simpler version of the UTF-32 converter, with
    1258             :  * fewer states for shorter BOMs.
    1259             :  *
    1260             :  * State values:
    1261             :  * 0    initial state
    1262             :  * 1    saw first byte
    1263             :  * 2..5 -
    1264             :  * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1
    1265             :  * 8    UTF-16BE mode
    1266             :  * 9    UTF-16LE mode
    1267             :  *
    1268             :  * During detection: state==number of initial bytes seen so far.
    1269             :  *
    1270             :  * On output, emit U+FEFF as the first code point.
    1271             :  *
    1272             :  * Variants:
    1273             :  * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error.
    1274             :  * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and
    1275             :  *   UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error.
    1276             :  */
    1277             : U_CDECL_BEGIN
    1278             : static void  U_CALLCONV
    1279           0 : _UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
    1280           0 :     if(choice<=UCNV_RESET_TO_UNICODE) {
    1281             :         /* reset toUnicode: state=0 */
    1282           0 :         cnv->mode=0;
    1283             :     }
    1284           0 :     if(choice!=UCNV_RESET_TO_UNICODE) {
    1285             :         /* reset fromUnicode: prepare to output the UTF-16PE BOM */
    1286           0 :         cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
    1287             :     }
    1288           0 : }
    1289             : U_CDECL_END
    1290             : extern const UConverterSharedData _UTF16v2Data;
    1291             : U_CDECL_BEGIN
    1292             : static void U_CALLCONV
    1293           0 : _UTF16Open(UConverter *cnv,
    1294             :            UConverterLoadArgs *pArgs,
    1295             :            UErrorCode *pErrorCode) {
    1296           0 :     if(UCNV_GET_VERSION(cnv)<=2) {
    1297           0 :         if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) {
    1298             :             /*
    1299             :              * Switch implementation, and switch the staticData that's different
    1300             :              * and was copied into the UConverter.
    1301             :              * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.)
    1302             :              * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream.
    1303             :              */
    1304           0 :             cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data;
    1305           0 :             uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN);
    1306             :         }
    1307           0 :         _UTF16Reset(cnv, UCNV_RESET_BOTH);
    1308             :     } else {
    1309           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1310             :     }
    1311           0 : }
    1312             : 
    1313             : static const char *  U_CALLCONV
    1314           0 : _UTF16GetName(const UConverter *cnv) {
    1315           0 :     if(UCNV_GET_VERSION(cnv)==0) {
    1316           0 :         return "UTF-16";
    1317           0 :     } else if(UCNV_GET_VERSION(cnv)==1) {
    1318           0 :         return "UTF-16,version=1";
    1319             :     } else {
    1320           0 :         return "UTF-16,version=2";
    1321             :     }
    1322             : }
    1323             : U_CDECL_END
    1324             : extern const UConverterSharedData _UTF16Data;
    1325             : 
    1326             : #define IS_UTF16BE(cnv) ((cnv)->sharedData==&_UTF16BEData)
    1327             : #define IS_UTF16LE(cnv) ((cnv)->sharedData==&_UTF16LEData)
    1328             : #define IS_UTF16(cnv) ((cnv)->sharedData==&_UTF16Data || (cnv)->sharedData==&_UTF16v2Data)
    1329             : 
    1330             : U_CDECL_BEGIN
    1331             : static void U_CALLCONV
    1332           0 : _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    1333             :                            UErrorCode *pErrorCode) {
    1334           0 :     UConverter *cnv=pArgs->converter;
    1335           0 :     const char *source=pArgs->source;
    1336           0 :     const char *sourceLimit=pArgs->sourceLimit;
    1337           0 :     int32_t *offsets=pArgs->offsets;
    1338             : 
    1339             :     int32_t state, offsetDelta;
    1340             :     uint8_t b;
    1341             : 
    1342           0 :     state=cnv->mode;
    1343             : 
    1344             :     /*
    1345             :      * If we detect a BOM in this buffer, then we must add the BOM size to the
    1346             :      * offsets because the actual converter function will not see and count the BOM.
    1347             :      * offsetDelta will have the number of the BOM bytes that are in the current buffer.
    1348             :      */
    1349           0 :     offsetDelta=0;
    1350             : 
    1351           0 :     while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
    1352           0 :         switch(state) {
    1353             :         case 0:
    1354           0 :             cnv->toUBytes[0]=(uint8_t)*source++;
    1355           0 :             cnv->toULength=1;
    1356           0 :             state=1;
    1357           0 :             break;
    1358             :         case 1:
    1359             :             /*
    1360             :              * Only inside this switch case can the state variable
    1361             :              * temporarily take two additional values:
    1362             :              * 6: BOM error, continue with BE
    1363             :              * 7: BOM error, continue with LE
    1364             :              */
    1365           0 :             b=*source;
    1366           0 :             if(cnv->toUBytes[0]==0xfe && b==0xff) {
    1367           0 :                 if(IS_UTF16LE(cnv)) {
    1368           0 :                     state=7; /* illegal reverse BOM for Java "UnicodeLittle" */
    1369             :                 } else {
    1370           0 :                     state=8; /* detect UTF-16BE */
    1371             :                 }
    1372           0 :             } else if(cnv->toUBytes[0]==0xff && b==0xfe) {
    1373           0 :                 if(IS_UTF16BE(cnv)) {
    1374           0 :                     state=6; /* illegal reverse BOM for Java "UnicodeBig" */
    1375             :                 } else {
    1376           0 :                     state=9; /* detect UTF-16LE */
    1377             :                 }
    1378           0 :             } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) {
    1379           0 :                 state=6; /* illegal missing BOM for Java "Unicode" */
    1380             :             }
    1381           0 :             if(state>=8) {
    1382             :                 /* BOM detected, consume it */
    1383           0 :                 ++source;
    1384           0 :                 cnv->toULength=0;
    1385           0 :                 offsetDelta=(int32_t)(source-pArgs->source);
    1386           0 :             } else if(state<6) {
    1387             :                 /* ok: no BOM, and not a reverse BOM */
    1388           0 :                 if(source!=pArgs->source) {
    1389             :                     /* reset the source for a correct first offset */
    1390           0 :                     source=pArgs->source;
    1391           0 :                     cnv->toULength=0;
    1392             :                 }
    1393           0 :                 if(IS_UTF16LE(cnv)) {
    1394             :                     /* Make Java "UnicodeLittle" default to LE. */
    1395           0 :                     state=9;
    1396             :                 } else {
    1397             :                     /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */
    1398           0 :                     state=8;
    1399             :                 }
    1400             :             } else {
    1401             :                 /*
    1402             :                  * error: missing BOM, or reverse BOM
    1403             :                  * UTF-16,version=1: Java-specific "Unicode" requires a BOM.
    1404             :                  * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM.
    1405             :                  * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM.
    1406             :                  */
    1407             :                 /* report the non-BOM or reverse BOM as an illegal sequence */
    1408           0 :                 cnv->toUBytes[1]=b;
    1409           0 :                 cnv->toULength=2;
    1410           0 :                 pArgs->source=source+1;
    1411             :                 /* continue with conversion if the callback resets the error */
    1412             :                 /*
    1413             :                  * Make Java "Unicode" default to BE like standard UTF-16.
    1414             :                  * Make Java "UnicodeBig" and "UnicodeLittle" default
    1415             :                  * to their normal endiannesses.
    1416             :                  */
    1417           0 :                 cnv->mode=state+2;
    1418           0 :                 *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
    1419           0 :                 return;
    1420             :             }
    1421             :             /* convert the rest of the stream */
    1422           0 :             cnv->mode=state;
    1423           0 :             continue;
    1424             :         case 8:
    1425             :             /* call UTF-16BE */
    1426           0 :             pArgs->source=source;
    1427           0 :             _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
    1428           0 :             source=pArgs->source;
    1429           0 :             break;
    1430             :         case 9:
    1431             :             /* call UTF-16LE */
    1432           0 :             pArgs->source=source;
    1433           0 :             _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
    1434           0 :             source=pArgs->source;
    1435           0 :             break;
    1436             :         default:
    1437           0 :             break; /* does not occur */
    1438             :         }
    1439             :     }
    1440             : 
    1441             :     /* add BOM size to offsets - see comment at offsetDelta declaration */
    1442           0 :     if(offsets!=NULL && offsetDelta!=0) {
    1443           0 :         int32_t *offsetsLimit=pArgs->offsets;
    1444           0 :         while(offsets<offsetsLimit) {
    1445           0 :             *offsets++ += offsetDelta;
    1446             :         }
    1447             :     }
    1448             : 
    1449           0 :     pArgs->source=source;
    1450             : 
    1451           0 :     if(source==sourceLimit && pArgs->flush) {
    1452             :         /* handle truncated input */
    1453           0 :         switch(state) {
    1454             :         case 0:
    1455           0 :             break; /* no input at all, nothing to do */
    1456             :         case 8:
    1457           0 :             _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
    1458           0 :             break;
    1459             :         case 9:
    1460           0 :             _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
    1461           0 :             break;
    1462             :         default:
    1463             :             /* 0<state<8: framework will report truncation, nothing to do here */
    1464           0 :             break;
    1465             :         }
    1466             :     }
    1467             : 
    1468           0 :     cnv->mode=state;
    1469             : }
    1470             : 
    1471             : static UChar32 U_CALLCONV
    1472           0 : _UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
    1473             :                    UErrorCode *pErrorCode) {
    1474           0 :     switch(pArgs->converter->mode) {
    1475             :     case 8:
    1476           0 :         return _UTF16BEGetNextUChar(pArgs, pErrorCode);
    1477             :     case 9:
    1478           0 :         return _UTF16LEGetNextUChar(pArgs, pErrorCode);
    1479             :     default:
    1480           0 :         return UCNV_GET_NEXT_UCHAR_USE_TO_U;
    1481             :     }
    1482             : }
    1483             : U_CDECL_END
    1484             : 
    1485             : static const UConverterImpl _UTF16Impl = {
    1486             :     UCNV_UTF16,
    1487             : 
    1488             :     NULL,
    1489             :     NULL,
    1490             : 
    1491             :     _UTF16Open,
    1492             :     NULL,
    1493             :     _UTF16Reset,
    1494             : 
    1495             :     _UTF16ToUnicodeWithOffsets,
    1496             :     _UTF16ToUnicodeWithOffsets,
    1497             :     _UTF16PEFromUnicodeWithOffsets,
    1498             :     _UTF16PEFromUnicodeWithOffsets,
    1499             :     _UTF16GetNextUChar,
    1500             : 
    1501             :     NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
    1502             :     _UTF16GetName,
    1503             :     NULL,
    1504             :     NULL,
    1505             :     ucnv_getNonSurrogateUnicodeSet,
    1506             : 
    1507             :     NULL,
    1508             :     NULL
    1509             : };
    1510             : 
    1511             : static const UConverterStaticData _UTF16StaticData = {
    1512             :     sizeof(UConverterStaticData),
    1513             :     "UTF-16",
    1514             :     1204, /* CCSID for BOM sensitive UTF-16 */
    1515             :     UCNV_IBM, UCNV_UTF16, 2, 2,
    1516             : #if U_IS_BIG_ENDIAN
    1517             :     { 0xff, 0xfd, 0, 0 }, 2,
    1518             : #else
    1519             :     { 0xfd, 0xff, 0, 0 }, 2,
    1520             : #endif
    1521             :     FALSE, FALSE,
    1522             :     0,
    1523             :     0,
    1524             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1525             : };
    1526             : 
    1527             : const UConverterSharedData _UTF16Data =
    1528             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl);
    1529             : 
    1530             : static const UConverterImpl _UTF16v2Impl = {
    1531             :     UCNV_UTF16,
    1532             : 
    1533             :     NULL,
    1534             :     NULL,
    1535             : 
    1536             :     _UTF16Open,
    1537             :     NULL,
    1538             :     _UTF16Reset,
    1539             : 
    1540             :     _UTF16ToUnicodeWithOffsets,
    1541             :     _UTF16ToUnicodeWithOffsets,
    1542             :     _UTF16BEFromUnicodeWithOffsets,
    1543             :     _UTF16BEFromUnicodeWithOffsets,
    1544             :     _UTF16GetNextUChar,
    1545             : 
    1546             :     NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
    1547             :     _UTF16GetName,
    1548             :     NULL,
    1549             :     NULL,
    1550             :     ucnv_getNonSurrogateUnicodeSet,
    1551             : 
    1552             :     NULL,
    1553             :     NULL
    1554             : };
    1555             : 
    1556             : static const UConverterStaticData _UTF16v2StaticData = {
    1557             :     sizeof(UConverterStaticData),
    1558             :     "UTF-16,version=2",
    1559             :     1204, /* CCSID for BOM sensitive UTF-16 */
    1560             :     UCNV_IBM, UCNV_UTF16, 2, 2,
    1561             :     { 0xff, 0xfd, 0, 0 }, 2,
    1562             :     FALSE, FALSE,
    1563             :     0,
    1564             :     0,
    1565             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1566             : };
    1567             : 
    1568             : const UConverterSharedData _UTF16v2Data =
    1569             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl);
    1570             : 
    1571             : #endif

Generated by: LCOV version 1.13