LCOV - code coverage report
Current view: top level - intl/icu/source/common - ustrtrns.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 853 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2001-2016, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *
      11             : * File ustrtrns.cpp
      12             : *
      13             : * Modification History:
      14             : *
      15             : *   Date        Name        Description
      16             : *   9/10/2001    Ram    Creation.
      17             : ******************************************************************************
      18             : */
      19             : 
      20             : /*******************************************************************************
      21             :  *
      22             :  * u_strTo* and u_strFrom* APIs
      23             :  * WCS functions moved to ustr_wcs.c for better modularization
      24             :  *
      25             :  *******************************************************************************
      26             :  */
      27             : 
      28             : 
      29             : #include "unicode/putil.h"
      30             : #include "unicode/ustring.h"
      31             : #include "unicode/utf.h"
      32             : #include "unicode/utf8.h"
      33             : #include "unicode/utf16.h"
      34             : #include "cstring.h"
      35             : #include "cmemory.h"
      36             : #include "ustr_imp.h"
      37             : #include "uassert.h"
      38             : 
      39             : U_CAPI UChar* U_EXPORT2 
      40           0 : u_strFromUTF32WithSub(UChar *dest,
      41             :                int32_t destCapacity,
      42             :                int32_t *pDestLength,
      43             :                const UChar32 *src,
      44             :                int32_t srcLength,
      45             :                UChar32 subchar, int32_t *pNumSubstitutions,
      46             :                UErrorCode *pErrorCode) {
      47             :     const UChar32 *srcLimit;
      48             :     UChar32 ch;
      49             :     UChar *destLimit;
      50             :     UChar *pDest;
      51             :     int32_t reqLength;
      52             :     int32_t numSubstitutions;
      53             : 
      54             :     /* args check */
      55           0 :     if(U_FAILURE(*pErrorCode)){
      56           0 :         return NULL;
      57             :     }
      58           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
      59           0 :         (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
      60           0 :         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
      61             :     ) {
      62           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
      63           0 :         return NULL;
      64             :     }
      65             : 
      66           0 :     if(pNumSubstitutions != NULL) {
      67           0 :         *pNumSubstitutions = 0;
      68             :     }
      69             : 
      70           0 :     pDest = dest;
      71           0 :     destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
      72           0 :     reqLength = 0;
      73           0 :     numSubstitutions = 0;
      74             : 
      75           0 :     if(srcLength < 0) {
      76             :         /* simple loop for conversion of a NUL-terminated BMP string */
      77           0 :         while((ch=*src) != 0 &&
      78           0 :               ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
      79           0 :             ++src;
      80           0 :             if(pDest < destLimit) {
      81           0 :                 *pDest++ = (UChar)ch;
      82             :             } else {
      83           0 :                 ++reqLength;
      84             :             }
      85             :         }
      86           0 :         srcLimit = src;
      87           0 :         if(ch != 0) {
      88             :             /* "complicated" case, find the end of the remaining string */
      89           0 :             while(*++srcLimit != 0) {}
      90             :         }
      91             :     } else {
      92           0 :       srcLimit = (src!=NULL)?(src + srcLength):NULL;
      93             :     }
      94             : 
      95             :     /* convert with length */
      96           0 :     while(src < srcLimit) {
      97           0 :         ch = *src++;
      98             :         do {
      99             :             /* usually "loops" once; twice only for writing subchar */
     100           0 :             if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
     101           0 :                 if(pDest < destLimit) {
     102           0 :                     *pDest++ = (UChar)ch;
     103             :                 } else {
     104           0 :                     ++reqLength;
     105             :                 }
     106           0 :                 break;
     107           0 :             } else if(0x10000 <= ch && ch <= 0x10ffff) {
     108           0 :                 if(pDest!=NULL && ((pDest + 2) <= destLimit)) {
     109           0 :                     *pDest++ = U16_LEAD(ch);
     110           0 :                     *pDest++ = U16_TRAIL(ch);
     111             :                 } else {
     112           0 :                     reqLength += 2;
     113             :                 }
     114           0 :                 break;
     115           0 :             } else if((ch = subchar) < 0) {
     116             :                 /* surrogate code point, or not a Unicode code point at all */
     117           0 :                 *pErrorCode = U_INVALID_CHAR_FOUND;
     118           0 :                 return NULL;
     119             :             } else {
     120           0 :                 ++numSubstitutions;
     121             :             }
     122             :         } while(TRUE);
     123             :     }
     124             : 
     125           0 :     reqLength += (int32_t)(pDest - dest);
     126           0 :     if(pDestLength) {
     127           0 :         *pDestLength = reqLength;
     128             :     }
     129           0 :     if(pNumSubstitutions != NULL) {
     130           0 :         *pNumSubstitutions = numSubstitutions;
     131             :     }
     132             : 
     133             :     /* Terminate the buffer */
     134           0 :     u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
     135             :     
     136           0 :     return dest;
     137             : }
     138             : 
     139             : U_CAPI UChar* U_EXPORT2 
     140           0 : u_strFromUTF32(UChar *dest,
     141             :                int32_t destCapacity, 
     142             :                int32_t *pDestLength,
     143             :                const UChar32 *src,
     144             :                int32_t srcLength,
     145             :                UErrorCode *pErrorCode) {
     146             :     return u_strFromUTF32WithSub(
     147             :             dest, destCapacity, pDestLength,
     148             :             src, srcLength,
     149             :             U_SENTINEL, NULL,
     150           0 :             pErrorCode);
     151             : }
     152             : 
     153             : U_CAPI UChar32* U_EXPORT2 
     154           0 : u_strToUTF32WithSub(UChar32 *dest,
     155             :              int32_t destCapacity,
     156             :              int32_t *pDestLength,
     157             :              const UChar *src,
     158             :              int32_t srcLength,
     159             :              UChar32 subchar, int32_t *pNumSubstitutions,
     160             :              UErrorCode *pErrorCode) {
     161             :     const UChar *srcLimit;
     162             :     UChar32 ch;
     163             :     UChar ch2;
     164             :     UChar32 *destLimit;
     165             :     UChar32 *pDest;
     166             :     int32_t reqLength;
     167             :     int32_t numSubstitutions;
     168             : 
     169             :     /* args check */
     170           0 :     if(U_FAILURE(*pErrorCode)){
     171           0 :         return NULL;
     172             :     }
     173           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
     174           0 :         (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
     175           0 :         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
     176             :     ) {
     177           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
     178           0 :         return NULL;
     179             :     }
     180             : 
     181           0 :     if(pNumSubstitutions != NULL) {
     182           0 :         *pNumSubstitutions = 0;
     183             :     }
     184             : 
     185           0 :     pDest = dest;
     186           0 :     destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
     187           0 :     reqLength = 0;
     188           0 :     numSubstitutions = 0;
     189             : 
     190           0 :     if(srcLength < 0) {
     191             :         /* simple loop for conversion of a NUL-terminated BMP string */
     192           0 :         while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
     193           0 :             ++src;
     194           0 :             if(pDest < destLimit) {
     195           0 :                 *pDest++ = ch;
     196             :             } else {
     197           0 :                 ++reqLength;
     198             :             }
     199             :         }
     200           0 :         srcLimit = src;
     201           0 :         if(ch != 0) {
     202             :             /* "complicated" case, find the end of the remaining string */
     203           0 :             while(*++srcLimit != 0) {}
     204             :         }
     205             :     } else {
     206           0 :         srcLimit = (src!=NULL)?(src + srcLength):NULL;
     207             :     }
     208             : 
     209             :     /* convert with length */
     210           0 :     while(src < srcLimit) {
     211           0 :         ch = *src++;
     212           0 :         if(!U16_IS_SURROGATE(ch)) {
     213             :             /* write or count ch below */
     214           0 :         } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
     215           0 :             ++src;
     216           0 :             ch = U16_GET_SUPPLEMENTARY(ch, ch2);
     217           0 :         } else if((ch = subchar) < 0) {
     218             :             /* unpaired surrogate */
     219           0 :             *pErrorCode = U_INVALID_CHAR_FOUND;
     220           0 :             return NULL;
     221             :         } else {
     222           0 :             ++numSubstitutions;
     223             :         }
     224           0 :         if(pDest < destLimit) {
     225           0 :             *pDest++ = ch;
     226             :         } else {
     227           0 :             ++reqLength;
     228             :         }
     229             :     }
     230             : 
     231           0 :     reqLength += (int32_t)(pDest - dest);
     232           0 :     if(pDestLength) {
     233           0 :         *pDestLength = reqLength;
     234             :     }
     235           0 :     if(pNumSubstitutions != NULL) {
     236           0 :         *pNumSubstitutions = numSubstitutions;
     237             :     }
     238             : 
     239             :     /* Terminate the buffer */
     240           0 :     u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
     241             : 
     242           0 :     return dest;
     243             : }
     244             : 
     245             : U_CAPI UChar32* U_EXPORT2 
     246           0 : u_strToUTF32(UChar32 *dest, 
     247             :              int32_t destCapacity,
     248             :              int32_t *pDestLength,
     249             :              const UChar *src, 
     250             :              int32_t srcLength,
     251             :              UErrorCode *pErrorCode) {
     252             :     return u_strToUTF32WithSub(
     253             :             dest, destCapacity, pDestLength,
     254             :             src, srcLength,
     255             :             U_SENTINEL, NULL,
     256           0 :             pErrorCode);
     257             : }
     258             : 
     259             : /* for utf8_nextCharSafeBodyTerminated() */
     260             : static const UChar32
     261             : utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
     262             : 
     263             : /*
     264             :  * Version of utf8_nextCharSafeBody() with the following differences:
     265             :  * - checks for NUL termination instead of length
     266             :  * - works with pointers instead of indexes
     267             :  * - always strict (strict==-1)
     268             :  *
     269             :  * *ps points to after the lead byte and will be moved to after the last trail byte.
     270             :  * c is the lead byte.
     271             :  * @return the code point, or U_SENTINEL
     272             :  */
     273             : static UChar32
     274           0 : utf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
     275           0 :     const uint8_t *s=*ps;
     276           0 :     uint8_t trail, illegal=0;
     277           0 :     uint8_t count=U8_COUNT_TRAIL_BYTES(c);
     278           0 :     U_ASSERT(count<6);
     279           0 :     U8_MASK_LEAD_BYTE((c), count);
     280             :     /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
     281           0 :     switch(count) {
     282             :     /* each branch falls through to the next one */
     283             :     case 5:
     284             :     case 4:
     285             :         /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
     286           0 :         illegal=1;
     287           0 :         break;
     288             :     case 3:
     289           0 :         trail=(uint8_t)(*s++ - 0x80);
     290           0 :         c=(c<<6)|trail;
     291           0 :         if(trail>0x3f || c>=0x110) {
     292             :             /* not a trail byte, or code point>0x10ffff (outside Unicode) */
     293           0 :             illegal=1;
     294           0 :             break;
     295             :         }
     296             :         U_FALLTHROUGH;
     297             :     case 2:
     298           0 :         trail=(uint8_t)(*s++ - 0x80);
     299           0 :         if(trail>0x3f) {
     300             :             /* not a trail byte */
     301           0 :             illegal=1;
     302           0 :             break;
     303             :         }
     304           0 :         c=(c<<6)|trail;
     305             :         U_FALLTHROUGH;
     306             :     case 1:
     307           0 :         trail=(uint8_t)(*s++ - 0x80);
     308           0 :         if(trail>0x3f) {
     309             :             /* not a trail byte */
     310           0 :             illegal=1;
     311             :         }
     312           0 :         c=(c<<6)|trail;
     313           0 :         break;
     314             :     case 0:
     315           0 :         return U_SENTINEL;
     316             :     /* no default branch to optimize switch()  - all values are covered */
     317             :     }
     318             : 
     319             :     /* correct sequence - all trail bytes have (b7..b6)==(10)? */
     320             :     /* illegal is also set if count>=4 */
     321           0 :     if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
     322             :         /* error handling */
     323             :         /* don't go beyond this sequence */
     324           0 :         s=*ps;
     325           0 :         while(count>0 && U8_IS_TRAIL(*s)) {
     326           0 :             ++s;
     327           0 :             --count;
     328             :         }
     329           0 :         c=U_SENTINEL;
     330             :     }
     331           0 :     *ps=s;
     332           0 :     return c;
     333             : }
     334             : 
     335             : /*
     336             :  * Version of utf8_nextCharSafeBody() with the following differences:
     337             :  * - works with pointers instead of indexes
     338             :  * - always strict (strict==-1)
     339             :  *
     340             :  * *ps points to after the lead byte and will be moved to after the last trail byte.
     341             :  * c is the lead byte.
     342             :  * @return the code point, or U_SENTINEL
     343             :  */
     344             : static UChar32
     345           0 : utf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
     346           0 :     const uint8_t *s=*ps;
     347           0 :     uint8_t trail, illegal=0;
     348           0 :     uint8_t count=U8_COUNT_TRAIL_BYTES(c);
     349           0 :     if((limit-s)>=count) {
     350           0 :         U8_MASK_LEAD_BYTE((c), count);
     351             :         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
     352           0 :         switch(count) {
     353             :         /* each branch falls through to the next one */
     354             :         case 5:
     355             :         case 4:
     356             :             /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
     357           0 :             illegal=1;
     358           0 :             break;
     359             :         case 3:
     360           0 :             trail=*s++;
     361           0 :             c=(c<<6)|(trail&0x3f);
     362           0 :             if(c<0x110) {
     363           0 :                 illegal|=(trail&0xc0)^0x80;
     364             :             } else {
     365             :                 /* code point>0x10ffff, outside Unicode */
     366           0 :                 illegal=1;
     367           0 :                 break;
     368             :             }
     369             :             U_FALLTHROUGH;
     370             :         case 2:
     371           0 :             trail=*s++;
     372           0 :             c=(c<<6)|(trail&0x3f);
     373           0 :             illegal|=(trail&0xc0)^0x80;
     374             :             U_FALLTHROUGH;
     375             :         case 1:
     376           0 :             trail=*s++;
     377           0 :             c=(c<<6)|(trail&0x3f);
     378           0 :             illegal|=(trail&0xc0)^0x80;
     379           0 :             break;
     380             :         case 0:
     381           0 :             return U_SENTINEL;
     382             :         /* no default branch to optimize switch()  - all values are covered */
     383             :         }
     384             :     } else {
     385           0 :         illegal=1; /* too few bytes left */
     386             :     }
     387             : 
     388             :     /* correct sequence - all trail bytes have (b7..b6)==(10)? */
     389             :     /* illegal is also set if count>=4 */
     390           0 :     U_ASSERT(illegal || count<UPRV_LENGTHOF(utf8_minLegal));
     391           0 :     if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
     392             :         /* error handling */
     393             :         /* don't go beyond this sequence */
     394           0 :         s=*ps;
     395           0 :         while(count>0 && s<limit && U8_IS_TRAIL(*s)) {
     396           0 :             ++s;
     397           0 :             --count;
     398             :         }
     399           0 :         c=U_SENTINEL;
     400             :     }
     401           0 :     *ps=s;
     402           0 :     return c;
     403             : }
     404             : 
     405             : U_CAPI UChar* U_EXPORT2
     406           0 : u_strFromUTF8WithSub(UChar *dest,
     407             :               int32_t destCapacity,
     408             :               int32_t *pDestLength,
     409             :               const char* src,
     410             :               int32_t srcLength,
     411             :               UChar32 subchar, int32_t *pNumSubstitutions,
     412             :               UErrorCode *pErrorCode){
     413           0 :     UChar *pDest = dest;
     414           0 :     UChar *pDestLimit = dest+destCapacity;
     415             :     UChar32 ch;
     416           0 :     int32_t reqLength = 0;
     417           0 :     const uint8_t* pSrc = (const uint8_t*) src;
     418             :     uint8_t t1, t2; /* trail bytes */
     419             :     int32_t numSubstitutions;
     420             : 
     421             :     /* args check */
     422           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
     423           0 :         return NULL;
     424             :     }
     425             :         
     426           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
     427           0 :         (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
     428           0 :         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
     429             :     ) {
     430           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
     431           0 :         return NULL;
     432             :     }
     433             : 
     434           0 :     if(pNumSubstitutions!=NULL) {
     435           0 :         *pNumSubstitutions=0;
     436             :     }
     437           0 :     numSubstitutions=0;
     438             : 
     439             :     /*
     440             :      * Inline processing of UTF-8 byte sequences:
     441             :      *
     442             :      * Byte sequences for the most common characters are handled inline in
     443             :      * the conversion loops. In order to reduce the path lengths for those
     444             :      * characters, the tests are arranged in a kind of binary search.
     445             :      * ASCII (<=0x7f) is checked first, followed by the dividing point
     446             :      * between 2- and 3-byte sequences (0xe0).
     447             :      * The 3-byte branch is tested first to speed up CJK text.
     448             :      * The compiler should combine the subtractions for the two tests for 0xe0.
     449             :      * Each branch then tests for the other end of its range.
     450             :      */
     451             : 
     452           0 :     if(srcLength < 0){
     453             :         /*
     454             :          * Transform a NUL-terminated string.
     455             :          * The code explicitly checks for NULs only in the lead byte position.
     456             :          * A NUL byte in the trail byte position fails the trail byte range check anyway.
     457             :          */
     458           0 :         while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
     459           0 :             if(ch <= 0x7f){
     460           0 :                 *pDest++=(UChar)ch;
     461           0 :                 ++pSrc;
     462             :             } else {
     463           0 :                 if(ch > 0xe0) {
     464           0 :                     if( /* handle U+1000..U+CFFF inline */
     465           0 :                         ch <= 0xec &&
     466           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
     467           0 :                         (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
     468             :                     ) {
     469             :                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     470           0 :                         *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
     471           0 :                         pSrc += 3;
     472           0 :                         continue;
     473             :                     }
     474           0 :                 } else if(ch < 0xe0) {
     475           0 :                     if( /* handle U+0080..U+07FF inline */
     476           0 :                         ch >= 0xc2 &&
     477           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
     478             :                     ) {
     479           0 :                         *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
     480           0 :                         pSrc += 2;
     481           0 :                         continue;
     482             :                     }
     483             :                 }
     484             : 
     485             :                 /* function call for "complicated" and error cases */
     486           0 :                 ++pSrc; /* continue after the lead byte */
     487           0 :                 ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
     488           0 :                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
     489           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
     490           0 :                     return NULL;
     491           0 :                 } else if(ch<=0xFFFF) {
     492           0 :                     *(pDest++)=(UChar)ch;
     493             :                 } else {
     494           0 :                     *(pDest++)=U16_LEAD(ch);
     495           0 :                     if(pDest<pDestLimit) {
     496           0 :                         *(pDest++)=U16_TRAIL(ch);
     497             :                     } else {
     498           0 :                         reqLength++;
     499           0 :                         break;
     500             :                     }
     501             :                 }
     502             :             }
     503             :         }
     504             : 
     505             :         /* Pre-flight the rest of the string. */
     506           0 :         while((ch = *pSrc) != 0) {
     507           0 :             if(ch <= 0x7f){
     508           0 :                 ++reqLength;
     509           0 :                 ++pSrc;
     510             :             } else {
     511           0 :                 if(ch > 0xe0) {
     512           0 :                     if( /* handle U+1000..U+CFFF inline */
     513           0 :                         ch <= 0xec &&
     514           0 :                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
     515           0 :                         (uint8_t)(pSrc[2] - 0x80) <= 0x3f
     516             :                     ) {
     517           0 :                         ++reqLength;
     518           0 :                         pSrc += 3;
     519           0 :                         continue;
     520             :                     }
     521           0 :                 } else if(ch < 0xe0) {
     522           0 :                     if( /* handle U+0080..U+07FF inline */
     523           0 :                         ch >= 0xc2 &&
     524           0 :                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f
     525             :                     ) {
     526           0 :                         ++reqLength;
     527           0 :                         pSrc += 2;
     528           0 :                         continue;
     529             :                     }
     530             :                 }
     531             : 
     532             :                 /* function call for "complicated" and error cases */
     533           0 :                 ++pSrc; /* continue after the lead byte */
     534           0 :                 ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
     535           0 :                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
     536           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
     537           0 :                     return NULL;
     538             :                 }
     539           0 :                 reqLength += U16_LENGTH(ch);
     540             :             }
     541             :         }
     542             :     } else /* srcLength >= 0 */ {
     543           0 :         const uint8_t *pSrcLimit = pSrc + srcLength;
     544             :         int32_t count;
     545             : 
     546             :         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
     547             :         for(;;) {
     548             :             /*
     549             :              * Each iteration of the inner loop progresses by at most 3 UTF-8
     550             :              * bytes and one UChar, for most characters.
     551             :              * For supplementary code points (4 & 2), which are rare,
     552             :              * there is an additional adjustment.
     553             :              */
     554           0 :             count = (int32_t)(pDestLimit - pDest);
     555           0 :             srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
     556           0 :             if(count > srcLength) {
     557           0 :                 count = srcLength; /* min(remaining dest, remaining src/3) */
     558             :             }
     559           0 :             if(count < 3) {
     560             :                 /*
     561             :                  * Too much overhead if we get near the end of the string,
     562             :                  * continue with the next loop.
     563             :                  */
     564           0 :                 break;
     565             :             }
     566             : 
     567           0 :             do {
     568           0 :                 ch = *pSrc;
     569           0 :                 if(ch <= 0x7f){
     570           0 :                     *pDest++=(UChar)ch;
     571           0 :                     ++pSrc;
     572             :                 } else {
     573           0 :                     if(ch > 0xe0) {
     574           0 :                         if( /* handle U+1000..U+CFFF inline */
     575           0 :                             ch <= 0xec &&
     576           0 :                             (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
     577           0 :                             (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
     578             :                         ) {
     579             :                             /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     580           0 :                             *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
     581           0 :                             pSrc += 3;
     582           0 :                             continue;
     583             :                         }
     584           0 :                     } else if(ch < 0xe0) {
     585           0 :                         if( /* handle U+0080..U+07FF inline */
     586           0 :                             ch >= 0xc2 &&
     587           0 :                             (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
     588             :                         ) {
     589           0 :                             *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
     590           0 :                             pSrc += 2;
     591           0 :                             continue;
     592             :                         }
     593             :                     }
     594             : 
     595           0 :                     if(ch >= 0xf0 || subchar > 0xffff) {
     596             :                         /*
     597             :                          * We may read up to six bytes and write up to two UChars,
     598             :                          * which we didn't account for with computing count,
     599             :                          * so we adjust it here.
     600             :                          */
     601           0 :                         if(--count == 0) {
     602           0 :                             break;
     603             :                         }
     604             :                     }
     605             : 
     606             :                     /* function call for "complicated" and error cases */
     607           0 :                     ++pSrc; /* continue after the lead byte */
     608           0 :                     ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
     609           0 :                     if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
     610           0 :                         *pErrorCode = U_INVALID_CHAR_FOUND;
     611           0 :                         return NULL;
     612           0 :                     }else if(ch<=0xFFFF){
     613           0 :                         *(pDest++)=(UChar)ch;
     614             :                     }else{
     615           0 :                         *(pDest++)=U16_LEAD(ch);
     616           0 :                         *(pDest++)=U16_TRAIL(ch);
     617             :                     }
     618             :                 }
     619             :             } while(--count > 0);
     620             :         }
     621             : 
     622           0 :         while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
     623           0 :             ch = *pSrc;
     624           0 :             if(ch <= 0x7f){
     625           0 :                 *pDest++=(UChar)ch;
     626           0 :                 ++pSrc;
     627             :             } else {
     628           0 :                 if(ch > 0xe0) {
     629           0 :                     if( /* handle U+1000..U+CFFF inline */
     630           0 :                         ch <= 0xec &&
     631           0 :                         ((pSrcLimit - pSrc) >= 3) &&
     632           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
     633           0 :                         (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
     634             :                     ) {
     635             :                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     636           0 :                         *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
     637           0 :                         pSrc += 3;
     638           0 :                         continue;
     639             :                     }
     640           0 :                 } else if(ch < 0xe0) {
     641           0 :                     if( /* handle U+0080..U+07FF inline */
     642           0 :                         ch >= 0xc2 &&
     643           0 :                         ((pSrcLimit - pSrc) >= 2) &&
     644           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
     645             :                     ) {
     646           0 :                         *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
     647           0 :                         pSrc += 2;
     648           0 :                         continue;
     649             :                     }
     650             :                 }
     651             : 
     652             :                 /* function call for "complicated" and error cases */
     653           0 :                 ++pSrc; /* continue after the lead byte */
     654           0 :                 ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
     655           0 :                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
     656           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
     657           0 :                     return NULL;
     658           0 :                 }else if(ch<=0xFFFF){
     659           0 :                     *(pDest++)=(UChar)ch;
     660             :                 }else{
     661           0 :                     *(pDest++)=U16_LEAD(ch);
     662           0 :                     if(pDest<pDestLimit){
     663           0 :                         *(pDest++)=U16_TRAIL(ch);
     664             :                     }else{
     665           0 :                         reqLength++;
     666           0 :                         break;
     667             :                     }
     668             :                 }
     669             :             }
     670             :         }
     671             :         /* do not fill the dest buffer just count the UChars needed */
     672           0 :         while(pSrc < pSrcLimit){
     673           0 :             ch = *pSrc;
     674           0 :             if(ch <= 0x7f){
     675           0 :                 reqLength++;
     676           0 :                 ++pSrc;
     677             :             } else {
     678           0 :                 if(ch > 0xe0) {
     679           0 :                     if( /* handle U+1000..U+CFFF inline */
     680           0 :                         ch <= 0xec &&
     681           0 :                         ((pSrcLimit - pSrc) >= 3) &&
     682           0 :                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
     683           0 :                         (uint8_t)(pSrc[2] - 0x80) <= 0x3f
     684             :                     ) {
     685           0 :                         reqLength++;
     686           0 :                         pSrc += 3;
     687           0 :                         continue;
     688             :                     }
     689           0 :                 } else if(ch < 0xe0) {
     690           0 :                     if( /* handle U+0080..U+07FF inline */
     691           0 :                         ch >= 0xc2 &&
     692           0 :                         ((pSrcLimit - pSrc) >= 2) &&
     693           0 :                         (uint8_t)(pSrc[1] - 0x80) <= 0x3f
     694             :                     ) {
     695           0 :                         reqLength++;
     696           0 :                         pSrc += 2;
     697           0 :                         continue;
     698             :                     }
     699             :                 }
     700             : 
     701             :                 /* function call for "complicated" and error cases */
     702           0 :                 ++pSrc; /* continue after the lead byte */
     703           0 :                 ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
     704           0 :                 if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
     705           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
     706           0 :                     return NULL;
     707             :                 }
     708           0 :                 reqLength+=U16_LENGTH(ch);
     709             :             }
     710             :         }
     711             :     }
     712             : 
     713           0 :     reqLength+=(int32_t)(pDest - dest);
     714             : 
     715           0 :     if(pNumSubstitutions!=NULL) {
     716           0 :         *pNumSubstitutions=numSubstitutions;
     717             :     }
     718             : 
     719           0 :     if(pDestLength){
     720           0 :         *pDestLength = reqLength;
     721             :     }
     722             : 
     723             :     /* Terminate the buffer */
     724           0 :     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
     725             : 
     726           0 :     return dest;
     727             : }
     728             : 
     729             : U_CAPI UChar* U_EXPORT2
     730           0 : u_strFromUTF8(UChar *dest,
     731             :               int32_t destCapacity,
     732             :               int32_t *pDestLength,
     733             :               const char* src,
     734             :               int32_t srcLength,
     735             :               UErrorCode *pErrorCode){
     736             :     return u_strFromUTF8WithSub(
     737             :             dest, destCapacity, pDestLength,
     738             :             src, srcLength,
     739             :             U_SENTINEL, NULL,
     740           0 :             pErrorCode);
     741             : }
     742             : 
     743             : U_CAPI UChar * U_EXPORT2
     744           0 : u_strFromUTF8Lenient(UChar *dest,
     745             :                      int32_t destCapacity,
     746             :                      int32_t *pDestLength,
     747             :                      const char *src,
     748             :                      int32_t srcLength,
     749             :                      UErrorCode *pErrorCode) {
     750           0 :     UChar *pDest = dest;
     751             :     UChar32 ch;
     752           0 :     int32_t reqLength = 0;
     753           0 :     uint8_t* pSrc = (uint8_t*) src;
     754             : 
     755             :     /* args check */
     756           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
     757           0 :         return NULL;
     758             :     }
     759             :         
     760           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
     761           0 :         (destCapacity<0) || (dest == NULL && destCapacity > 0)
     762             :     ) {
     763           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
     764           0 :         return NULL;
     765             :     }
     766             : 
     767           0 :     if(srcLength < 0) {
     768             :         /* Transform a NUL-terminated string. */
     769           0 :         UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
     770             :         uint8_t t1, t2, t3; /* trail bytes */
     771             : 
     772           0 :         while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
     773           0 :             if(ch < 0xc0) {
     774             :                 /*
     775             :                  * ASCII, or a trail byte in lead position which is treated like
     776             :                  * a single-byte sequence for better character boundary
     777             :                  * resynchronization after illegal sequences.
     778             :                  */
     779           0 :                 *pDest++=(UChar)ch;
     780           0 :                 ++pSrc;
     781           0 :                 continue;
     782           0 :             } else if(ch < 0xe0) { /* U+0080..U+07FF */
     783           0 :                 if((t1 = pSrc[1]) != 0) {
     784             :                     /* 0x3080 = (0xc0 << 6) + 0x80 */
     785           0 :                     *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
     786           0 :                     pSrc += 2;
     787           0 :                     continue;
     788             :                 }
     789           0 :             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
     790           0 :                 if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
     791             :                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     792             :                     /* 0x2080 = (0x80 << 6) + 0x80 */
     793           0 :                     *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
     794           0 :                     pSrc += 3;
     795           0 :                     continue;
     796             :                 }
     797             :             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
     798           0 :                 if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
     799           0 :                     pSrc += 4;
     800             :                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
     801           0 :                     ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
     802           0 :                     *(pDest++) = U16_LEAD(ch);
     803           0 :                     if(pDest < pDestLimit) {
     804           0 :                         *(pDest++) = U16_TRAIL(ch);
     805             :                     } else {
     806           0 :                         reqLength = 1;
     807           0 :                         break;
     808             :                     }
     809           0 :                     continue;
     810             :                 }
     811             :             }
     812             : 
     813             :             /* truncated character at the end */
     814           0 :             *pDest++ = 0xfffd;
     815           0 :             while(*++pSrc != 0) {}
     816           0 :             break;
     817             :         }
     818             : 
     819             :         /* Pre-flight the rest of the string. */
     820           0 :         while((ch = *pSrc) != 0) {
     821           0 :             if(ch < 0xc0) {
     822             :                 /*
     823             :                  * ASCII, or a trail byte in lead position which is treated like
     824             :                  * a single-byte sequence for better character boundary
     825             :                  * resynchronization after illegal sequences.
     826             :                  */
     827           0 :                 ++reqLength;
     828           0 :                 ++pSrc;
     829           0 :                 continue;
     830           0 :             } else if(ch < 0xe0) { /* U+0080..U+07FF */
     831           0 :                 if(pSrc[1] != 0) {
     832           0 :                     ++reqLength;
     833           0 :                     pSrc += 2;
     834           0 :                     continue;
     835             :                 }
     836           0 :             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
     837           0 :                 if(pSrc[1] != 0 && pSrc[2] != 0) {
     838           0 :                     ++reqLength;
     839           0 :                     pSrc += 3;
     840           0 :                     continue;
     841             :                 }
     842             :             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
     843           0 :                 if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
     844           0 :                     reqLength += 2;
     845           0 :                     pSrc += 4;
     846           0 :                     continue;
     847             :                 }
     848             :             }
     849             : 
     850             :             /* truncated character at the end */
     851           0 :             ++reqLength;
     852           0 :             break;
     853             :         }
     854             :     } else /* srcLength >= 0 */ {
     855           0 :       const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL;
     856             : 
     857             :         /*
     858             :          * This function requires that if srcLength is given, then it must be
     859             :          * destCapatity >= srcLength so that we need not check for
     860             :          * destination buffer overflow in the loop.
     861             :          */
     862           0 :         if(destCapacity < srcLength) {
     863           0 :             if(pDestLength != NULL) {
     864           0 :                 *pDestLength = srcLength; /* this likely overestimates the true destLength! */
     865             :             }
     866           0 :             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
     867           0 :             return NULL;
     868             :         }
     869             : 
     870           0 :         if((pSrcLimit - pSrc) >= 4) {
     871           0 :             pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
     872             : 
     873             :             /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
     874           0 :             do {
     875           0 :                 ch = *pSrc++;
     876           0 :                 if(ch < 0xc0) {
     877             :                     /*
     878             :                      * ASCII, or a trail byte in lead position which is treated like
     879             :                      * a single-byte sequence for better character boundary
     880             :                      * resynchronization after illegal sequences.
     881             :                      */
     882           0 :                     *pDest++=(UChar)ch;
     883           0 :                 } else if(ch < 0xe0) { /* U+0080..U+07FF */
     884             :                     /* 0x3080 = (0xc0 << 6) + 0x80 */
     885           0 :                     *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
     886           0 :                 } else if(ch < 0xf0) { /* U+0800..U+FFFF */
     887             :                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     888             :                     /* 0x2080 = (0x80 << 6) + 0x80 */
     889           0 :                     ch = (ch << 12) + (*pSrc++ << 6);
     890           0 :                     *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
     891             :                 } else /* f0..f4 */ { /* U+10000..U+10FFFF */
     892             :                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
     893           0 :                     ch = (ch << 18) + (*pSrc++ << 12);
     894           0 :                     ch += *pSrc++ << 6;
     895           0 :                     ch += *pSrc++ - 0x3c82080;
     896           0 :                     *(pDest++) = U16_LEAD(ch);
     897           0 :                     *(pDest++) = U16_TRAIL(ch);
     898             :                 }
     899           0 :             } while(pSrc < pSrcLimit);
     900             : 
     901           0 :             pSrcLimit += 3; /* restore original pSrcLimit */
     902             :         }
     903             : 
     904           0 :         while(pSrc < pSrcLimit) {
     905           0 :             ch = *pSrc++;
     906           0 :             if(ch < 0xc0) {
     907             :                 /*
     908             :                  * ASCII, or a trail byte in lead position which is treated like
     909             :                  * a single-byte sequence for better character boundary
     910             :                  * resynchronization after illegal sequences.
     911             :                  */
     912           0 :                 *pDest++=(UChar)ch;
     913           0 :                 continue;
     914           0 :             } else if(ch < 0xe0) { /* U+0080..U+07FF */
     915           0 :                 if(pSrc < pSrcLimit) {
     916             :                     /* 0x3080 = (0xc0 << 6) + 0x80 */
     917           0 :                     *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
     918           0 :                     continue;
     919             :                 }
     920           0 :             } else if(ch < 0xf0) { /* U+0800..U+FFFF */
     921           0 :                 if((pSrcLimit - pSrc) >= 2) {
     922             :                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
     923             :                     /* 0x2080 = (0x80 << 6) + 0x80 */
     924           0 :                     ch = (ch << 12) + (*pSrc++ << 6);
     925           0 :                     *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
     926           0 :                     pSrc += 3;
     927           0 :                     continue;
     928             :                 }
     929             :             } else /* f0..f4 */ { /* U+10000..U+10FFFF */
     930           0 :                 if((pSrcLimit - pSrc) >= 3) {
     931             :                     /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
     932           0 :                     ch = (ch << 18) + (*pSrc++ << 12);
     933           0 :                     ch += *pSrc++ << 6;
     934           0 :                     ch += *pSrc++ - 0x3c82080;
     935           0 :                     *(pDest++) = U16_LEAD(ch);
     936           0 :                     *(pDest++) = U16_TRAIL(ch);
     937           0 :                     pSrc += 4;
     938           0 :                     continue;
     939             :                 }
     940             :             }
     941             : 
     942             :             /* truncated character at the end */
     943           0 :             *pDest++ = 0xfffd;
     944           0 :             break;
     945             :         }
     946             :     }
     947             : 
     948           0 :     reqLength+=(int32_t)(pDest - dest);
     949             : 
     950           0 :     if(pDestLength){
     951           0 :         *pDestLength = reqLength;
     952             :     }
     953             : 
     954             :     /* Terminate the buffer */
     955           0 :     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
     956             : 
     957           0 :     return dest;
     958             : }
     959             : 
     960             : static inline uint8_t *
     961           0 : _appendUTF8(uint8_t *pDest, UChar32 c) {
     962             :     /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
     963           0 :     if((c)<=0x7f) {
     964           0 :         *pDest++=(uint8_t)c;
     965           0 :     } else if(c<=0x7ff) {
     966           0 :         *pDest++=(uint8_t)((c>>6)|0xc0);
     967           0 :         *pDest++=(uint8_t)((c&0x3f)|0x80);
     968           0 :     } else if(c<=0xffff) {
     969           0 :         *pDest++=(uint8_t)((c>>12)|0xe0);
     970           0 :         *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
     971           0 :         *pDest++=(uint8_t)(((c)&0x3f)|0x80);
     972             :     } else /* if((uint32_t)(c)<=0x10ffff) */ {
     973           0 :         *pDest++=(uint8_t)(((c)>>18)|0xf0);
     974           0 :         *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
     975           0 :         *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
     976           0 :         *pDest++=(uint8_t)(((c)&0x3f)|0x80);
     977             :     }
     978           0 :     return pDest;
     979             : }
     980             : 
     981             :    
     982             : U_CAPI char* U_EXPORT2 
     983           0 : u_strToUTF8WithSub(char *dest,
     984             :             int32_t destCapacity,
     985             :             int32_t *pDestLength,
     986             :             const UChar *pSrc,
     987             :             int32_t srcLength,
     988             :             UChar32 subchar, int32_t *pNumSubstitutions,
     989             :             UErrorCode *pErrorCode){
     990           0 :     int32_t reqLength=0;
     991           0 :     uint32_t ch=0,ch2=0;
     992           0 :     uint8_t *pDest = (uint8_t *)dest;
     993           0 :     uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL;
     994             :     int32_t numSubstitutions;
     995             : 
     996             :     /* args check */
     997           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
     998           0 :         return NULL;
     999             :     }
    1000             :         
    1001           0 :     if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
    1002           0 :         (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
    1003           0 :         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    1004             :     ) {
    1005           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1006           0 :         return NULL;
    1007             :     }
    1008             : 
    1009           0 :     if(pNumSubstitutions!=NULL) {
    1010           0 :         *pNumSubstitutions=0;
    1011             :     }
    1012           0 :     numSubstitutions=0;
    1013             : 
    1014           0 :     if(srcLength==-1) {
    1015           0 :         while((ch=*pSrc)!=0) {
    1016           0 :             ++pSrc;
    1017           0 :             if(ch <= 0x7f) {
    1018           0 :                 if(pDest<pDestLimit) {
    1019           0 :                     *pDest++ = (uint8_t)ch;
    1020             :                 } else {
    1021           0 :                     reqLength = 1;
    1022           0 :                     break;
    1023             :                 }
    1024           0 :             } else if(ch <= 0x7ff) {
    1025           0 :                 if((pDestLimit - pDest) >= 2) {
    1026           0 :                     *pDest++=(uint8_t)((ch>>6)|0xc0);
    1027           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1028             :                 } else {
    1029           0 :                     reqLength = 2;
    1030           0 :                     break;
    1031             :                 }
    1032           0 :             } else if(ch <= 0xd7ff || ch >= 0xe000) {
    1033           0 :                 if((pDestLimit - pDest) >= 3) {
    1034           0 :                     *pDest++=(uint8_t)((ch>>12)|0xe0);
    1035           0 :                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1036           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1037             :                 } else {
    1038           0 :                     reqLength = 3;
    1039           0 :                     break;
    1040             :                 }
    1041             :             } else /* ch is a surrogate */ {
    1042             :                 int32_t length;
    1043             : 
    1044             :                 /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
    1045           0 :                 if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 
    1046           0 :                     ++pSrc;
    1047           0 :                     ch=U16_GET_SUPPLEMENTARY(ch, ch2);
    1048           0 :                 } else if(subchar>=0) {
    1049           0 :                     ch=subchar;
    1050           0 :                     ++numSubstitutions;
    1051             :                 } else {
    1052             :                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */
    1053           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
    1054           0 :                     return NULL;
    1055             :                 }
    1056             : 
    1057           0 :                 length = U8_LENGTH(ch);
    1058           0 :                 if((pDestLimit - pDest) >= length) {
    1059             :                     /* convert and append*/
    1060           0 :                     pDest=_appendUTF8(pDest, ch);
    1061             :                 } else {
    1062           0 :                     reqLength = length;
    1063           0 :                     break;
    1064             :                 }
    1065             :             }
    1066             :         }
    1067           0 :         while((ch=*pSrc++)!=0) {
    1068           0 :             if(ch<=0x7f) {
    1069           0 :                 ++reqLength;
    1070           0 :             } else if(ch<=0x7ff) {
    1071           0 :                 reqLength+=2;
    1072           0 :             } else if(!U16_IS_SURROGATE(ch)) {
    1073           0 :                 reqLength+=3;
    1074           0 :             } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
    1075           0 :                 ++pSrc;
    1076           0 :                 reqLength+=4;
    1077           0 :             } else if(subchar>=0) {
    1078           0 :                 reqLength+=U8_LENGTH(subchar);
    1079           0 :                 ++numSubstitutions;
    1080             :             } else {
    1081             :                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
    1082           0 :                 *pErrorCode = U_INVALID_CHAR_FOUND;
    1083           0 :                 return NULL;
    1084             :             }
    1085             :         }
    1086             :     } else {
    1087           0 :         const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL;
    1088             :         int32_t count;
    1089             : 
    1090             :         /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
    1091             :         for(;;) {
    1092             :             /*
    1093             :              * Each iteration of the inner loop progresses by at most 3 UTF-8
    1094             :              * bytes and one UChar, for most characters.
    1095             :              * For supplementary code points (4 & 2), which are rare,
    1096             :              * there is an additional adjustment.
    1097             :              */
    1098           0 :             count = (int32_t)((pDestLimit - pDest) / 3);
    1099           0 :             srcLength = (int32_t)(pSrcLimit - pSrc);
    1100           0 :             if(count > srcLength) {
    1101           0 :                 count = srcLength; /* min(remaining dest/3, remaining src) */
    1102             :             }
    1103           0 :             if(count < 3) {
    1104             :                 /*
    1105             :                  * Too much overhead if we get near the end of the string,
    1106             :                  * continue with the next loop.
    1107             :                  */
    1108           0 :                 break;
    1109             :             }
    1110           0 :             do {
    1111           0 :                 ch=*pSrc++;
    1112           0 :                 if(ch <= 0x7f) {
    1113           0 :                     *pDest++ = (uint8_t)ch;
    1114           0 :                 } else if(ch <= 0x7ff) {
    1115           0 :                     *pDest++=(uint8_t)((ch>>6)|0xc0);
    1116           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1117           0 :                 } else if(ch <= 0xd7ff || ch >= 0xe000) {
    1118           0 :                     *pDest++=(uint8_t)((ch>>12)|0xe0);
    1119           0 :                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1120           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1121             :                 } else /* ch is a surrogate */ {
    1122             :                     /*
    1123             :                      * We will read two UChars and probably output four bytes,
    1124             :                      * which we didn't account for with computing count,
    1125             :                      * so we adjust it here.
    1126             :                      */
    1127           0 :                     if(--count == 0) {
    1128           0 :                         --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
    1129           0 :                         break;  /* recompute count */
    1130             :                     }
    1131             : 
    1132           0 :                     if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 
    1133           0 :                         ++pSrc;
    1134           0 :                         ch=U16_GET_SUPPLEMENTARY(ch, ch2);
    1135             : 
    1136             :                         /* writing 4 bytes per 2 UChars is ok */
    1137           0 :                         *pDest++=(uint8_t)((ch>>18)|0xf0);
    1138           0 :                         *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
    1139           0 :                         *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1140           0 :                         *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1141             :                     } else  {
    1142             :                         /* Unicode 3.2 forbids surrogate code points in UTF-8 */
    1143           0 :                         if(subchar>=0) {
    1144           0 :                             ch=subchar;
    1145           0 :                             ++numSubstitutions;
    1146             :                         } else {
    1147           0 :                             *pErrorCode = U_INVALID_CHAR_FOUND;
    1148           0 :                             return NULL;
    1149             :                         }
    1150             : 
    1151             :                         /* convert and append*/
    1152           0 :                         pDest=_appendUTF8(pDest, ch);
    1153             :                     }
    1154             :                 }
    1155             :             } while(--count > 0);
    1156             :         }
    1157             : 
    1158           0 :         while(pSrc<pSrcLimit) {
    1159           0 :             ch=*pSrc++;
    1160           0 :             if(ch <= 0x7f) {
    1161           0 :                 if(pDest<pDestLimit) {
    1162           0 :                     *pDest++ = (uint8_t)ch;
    1163             :                 } else {
    1164           0 :                     reqLength = 1;
    1165           0 :                     break;
    1166             :                 }
    1167           0 :             } else if(ch <= 0x7ff) {
    1168           0 :                 if((pDestLimit - pDest) >= 2) {
    1169           0 :                     *pDest++=(uint8_t)((ch>>6)|0xc0);
    1170           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1171             :                 } else {
    1172           0 :                     reqLength = 2;
    1173           0 :                     break;
    1174             :                 }
    1175           0 :             } else if(ch <= 0xd7ff || ch >= 0xe000) {
    1176           0 :                 if((pDestLimit - pDest) >= 3) {
    1177           0 :                     *pDest++=(uint8_t)((ch>>12)|0xe0);
    1178           0 :                     *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1179           0 :                     *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1180             :                 } else {
    1181           0 :                     reqLength = 3;
    1182           0 :                     break;
    1183             :                 }
    1184             :             } else /* ch is a surrogate */ {
    1185             :                 int32_t length;
    1186             : 
    1187           0 :                 if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 
    1188           0 :                     ++pSrc;
    1189           0 :                     ch=U16_GET_SUPPLEMENTARY(ch, ch2);
    1190           0 :                 } else if(subchar>=0) {
    1191           0 :                     ch=subchar;
    1192           0 :                     ++numSubstitutions;
    1193             :                 } else {
    1194             :                     /* Unicode 3.2 forbids surrogate code points in UTF-8 */
    1195           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
    1196           0 :                     return NULL;
    1197             :                 }
    1198             : 
    1199           0 :                 length = U8_LENGTH(ch);
    1200           0 :                 if((pDestLimit - pDest) >= length) {
    1201             :                     /* convert and append*/
    1202           0 :                     pDest=_appendUTF8(pDest, ch);
    1203             :                 } else {
    1204           0 :                     reqLength = length;
    1205           0 :                     break;
    1206             :                 }
    1207             :             }
    1208             :         }
    1209           0 :         while(pSrc<pSrcLimit) {
    1210           0 :             ch=*pSrc++;
    1211           0 :             if(ch<=0x7f) {
    1212           0 :                 ++reqLength;
    1213           0 :             } else if(ch<=0x7ff) {
    1214           0 :                 reqLength+=2;
    1215           0 :             } else if(!U16_IS_SURROGATE(ch)) {
    1216           0 :                 reqLength+=3;
    1217           0 :             } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
    1218           0 :                 ++pSrc;
    1219           0 :                 reqLength+=4;
    1220           0 :             } else if(subchar>=0) {
    1221           0 :                 reqLength+=U8_LENGTH(subchar);
    1222           0 :                 ++numSubstitutions;
    1223             :             } else {
    1224             :                 /* Unicode 3.2 forbids surrogate code points in UTF-8 */
    1225           0 :                 *pErrorCode = U_INVALID_CHAR_FOUND;
    1226           0 :                 return NULL;
    1227             :             }
    1228             :         }
    1229             :     }
    1230             : 
    1231           0 :     reqLength+=(int32_t)(pDest - (uint8_t *)dest);
    1232             : 
    1233           0 :     if(pNumSubstitutions!=NULL) {
    1234           0 :         *pNumSubstitutions=numSubstitutions;
    1235             :     }
    1236             : 
    1237           0 :     if(pDestLength){
    1238           0 :         *pDestLength = reqLength;
    1239             :     }
    1240             : 
    1241             :     /* Terminate the buffer */
    1242           0 :     u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
    1243           0 :     return dest;
    1244             : }
    1245             : 
    1246             : U_CAPI char* U_EXPORT2 
    1247           0 : u_strToUTF8(char *dest,
    1248             :             int32_t destCapacity,
    1249             :             int32_t *pDestLength,
    1250             :             const UChar *pSrc,
    1251             :             int32_t srcLength,
    1252             :             UErrorCode *pErrorCode){
    1253             :     return u_strToUTF8WithSub(
    1254             :             dest, destCapacity, pDestLength,
    1255             :             pSrc, srcLength,
    1256             :             U_SENTINEL, NULL,
    1257           0 :             pErrorCode);
    1258             : }
    1259             : 
    1260             : U_CAPI UChar* U_EXPORT2
    1261           0 : u_strFromJavaModifiedUTF8WithSub(
    1262             :         UChar *dest,
    1263             :         int32_t destCapacity,
    1264             :         int32_t *pDestLength,
    1265             :         const char *src,
    1266             :         int32_t srcLength,
    1267             :         UChar32 subchar, int32_t *pNumSubstitutions,
    1268             :         UErrorCode *pErrorCode) {
    1269           0 :     UChar *pDest = dest;
    1270           0 :     UChar *pDestLimit = dest+destCapacity;
    1271             :     UChar32 ch;
    1272           0 :     int32_t reqLength = 0;
    1273           0 :     const uint8_t* pSrc = (const uint8_t*) src;
    1274             :     const uint8_t *pSrcLimit;
    1275             :     int32_t count;
    1276             :     uint8_t t1, t2; /* trail bytes */
    1277             :     int32_t numSubstitutions;
    1278             : 
    1279             :     /* args check */
    1280           0 :     if(U_FAILURE(*pErrorCode)){
    1281           0 :         return NULL;
    1282             :     }
    1283           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    1284           0 :         (dest==NULL && destCapacity!=0) || destCapacity<0 ||
    1285           0 :         subchar > 0x10ffff || U_IS_SURROGATE(subchar)
    1286             :     ) {
    1287           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1288           0 :         return NULL;
    1289             :     }
    1290             : 
    1291           0 :     if(pNumSubstitutions!=NULL) {
    1292           0 :         *pNumSubstitutions=0;
    1293             :     }
    1294           0 :     numSubstitutions=0;
    1295             : 
    1296           0 :     if(srcLength < 0) {
    1297             :         /*
    1298             :          * Transform a NUL-terminated ASCII string.
    1299             :          * Handle non-ASCII strings with slower code.
    1300             :          */
    1301           0 :         while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
    1302           0 :             *pDest++=(UChar)ch;
    1303           0 :             ++pSrc;
    1304             :         }
    1305           0 :         if(ch == 0) {
    1306           0 :             reqLength=(int32_t)(pDest - dest);
    1307           0 :             if(pDestLength) {
    1308           0 :                 *pDestLength = reqLength;
    1309             :             }
    1310             : 
    1311             :             /* Terminate the buffer */
    1312           0 :             u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
    1313           0 :             return dest;
    1314             :         }
    1315           0 :         srcLength = uprv_strlen((const char *)pSrc);
    1316             :     }
    1317             : 
    1318             :     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
    1319           0 :     pSrcLimit = (pSrc == NULL) ? NULL : pSrc + srcLength;
    1320             :     for(;;) {
    1321           0 :         count = (int32_t)(pDestLimit - pDest);
    1322           0 :         srcLength = (int32_t)(pSrcLimit - pSrc);
    1323           0 :         if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
    1324             :             /* fast ASCII loop */
    1325           0 :             const uint8_t *prevSrc = pSrc;
    1326             :             int32_t delta;
    1327           0 :             while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
    1328           0 :                 *pDest++=(UChar)ch;
    1329           0 :                 ++pSrc;
    1330             :             }
    1331           0 :             delta = (int32_t)(pSrc - prevSrc);
    1332           0 :             count -= delta;
    1333           0 :             srcLength -= delta;
    1334             :         }
    1335             :         /*
    1336             :          * Each iteration of the inner loop progresses by at most 3 UTF-8
    1337             :          * bytes and one UChar.
    1338             :          */
    1339           0 :         srcLength /= 3;
    1340           0 :         if(count > srcLength) {
    1341           0 :             count = srcLength; /* min(remaining dest, remaining src/3) */
    1342             :         }
    1343           0 :         if(count < 3) {
    1344             :             /*
    1345             :              * Too much overhead if we get near the end of the string,
    1346             :              * continue with the next loop.
    1347             :              */
    1348           0 :             break;
    1349             :         }
    1350           0 :         do {
    1351           0 :             ch = *pSrc;
    1352           0 :             if(ch <= 0x7f){
    1353           0 :                 *pDest++=(UChar)ch;
    1354           0 :                 ++pSrc;
    1355             :             } else {
    1356           0 :                 if(ch >= 0xe0) {
    1357           0 :                     if( /* handle U+0000..U+FFFF inline */
    1358           0 :                         ch <= 0xef &&
    1359           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
    1360           0 :                         (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
    1361             :                     ) {
    1362             :                         /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    1363           0 :                         *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
    1364           0 :                         pSrc += 3;
    1365           0 :                         continue;
    1366             :                     }
    1367             :                 } else {
    1368           0 :                     if( /* handle U+0000..U+07FF inline */
    1369           0 :                         ch >= 0xc0 &&
    1370           0 :                         (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
    1371             :                     ) {
    1372           0 :                         *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
    1373           0 :                         pSrc += 2;
    1374           0 :                         continue;
    1375             :                     }
    1376             :                 }
    1377             : 
    1378           0 :                 if(subchar < 0) {
    1379           0 :                     *pErrorCode = U_INVALID_CHAR_FOUND;
    1380           0 :                     return NULL;
    1381           0 :                 } else if(subchar > 0xffff && --count == 0) {
    1382             :                     /*
    1383             :                      * We need to write two UChars, adjusted count for that,
    1384             :                      * and ran out of space.
    1385             :                      */
    1386           0 :                     break;
    1387             :                 } else {
    1388             :                     /* function call for error cases */
    1389           0 :                     ++pSrc; /* continue after the lead byte */
    1390           0 :                     utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    1391           0 :                     ++numSubstitutions;
    1392           0 :                     if(subchar<=0xFFFF) {
    1393           0 :                         *(pDest++)=(UChar)subchar;
    1394             :                     } else {
    1395           0 :                         *(pDest++)=U16_LEAD(subchar);
    1396           0 :                         *(pDest++)=U16_TRAIL(subchar);
    1397             :                     }
    1398             :                 }
    1399             :             }
    1400             :         } while(--count > 0);
    1401           0 :     }
    1402             : 
    1403           0 :     while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
    1404           0 :         ch = *pSrc;
    1405           0 :         if(ch <= 0x7f){
    1406           0 :             *pDest++=(UChar)ch;
    1407           0 :             ++pSrc;
    1408             :         } else {
    1409           0 :             if(ch >= 0xe0) {
    1410           0 :                 if( /* handle U+0000..U+FFFF inline */
    1411           0 :                     ch <= 0xef &&
    1412           0 :                     ((pSrcLimit - pSrc) >= 3) &&
    1413           0 :                     (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
    1414           0 :                     (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
    1415             :                 ) {
    1416             :                     /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
    1417           0 :                     *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
    1418           0 :                     pSrc += 3;
    1419           0 :                     continue;
    1420             :                 }
    1421             :             } else {
    1422           0 :                 if( /* handle U+0000..U+07FF inline */
    1423           0 :                     ch >= 0xc0 &&
    1424           0 :                     ((pSrcLimit - pSrc) >= 2) &&
    1425           0 :                     (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
    1426             :                 ) {
    1427           0 :                     *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
    1428           0 :                     pSrc += 2;
    1429           0 :                     continue;
    1430             :                 }
    1431             :             }
    1432             : 
    1433           0 :             if(subchar < 0) {
    1434           0 :                 *pErrorCode = U_INVALID_CHAR_FOUND;
    1435           0 :                 return NULL;
    1436             :             } else {
    1437             :                 /* function call for error cases */
    1438           0 :                 ++pSrc; /* continue after the lead byte */
    1439           0 :                 utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    1440           0 :                 ++numSubstitutions;
    1441           0 :                 if(subchar<=0xFFFF) {
    1442           0 :                     *(pDest++)=(UChar)subchar;
    1443             :                 } else {
    1444           0 :                     *(pDest++)=U16_LEAD(subchar);
    1445           0 :                     if(pDest<pDestLimit) {
    1446           0 :                         *(pDest++)=U16_TRAIL(subchar);
    1447             :                     } else {
    1448           0 :                         reqLength++;
    1449           0 :                         break;
    1450             :                     }
    1451             :                 }
    1452             :             }
    1453             :         }
    1454             :     }
    1455             : 
    1456             :     /* do not fill the dest buffer just count the UChars needed */
    1457           0 :     while(pSrc < pSrcLimit){
    1458           0 :         ch = *pSrc;
    1459           0 :         if(ch <= 0x7f) {
    1460           0 :             reqLength++;
    1461           0 :             ++pSrc;
    1462             :         } else {
    1463           0 :             if(ch >= 0xe0) {
    1464           0 :                 if( /* handle U+0000..U+FFFF inline */
    1465           0 :                     ch <= 0xef &&
    1466           0 :                     ((pSrcLimit - pSrc) >= 3) &&
    1467           0 :                     (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
    1468           0 :                     (uint8_t)(pSrc[2] - 0x80) <= 0x3f
    1469             :                 ) {
    1470           0 :                     reqLength++;
    1471           0 :                     pSrc += 3;
    1472           0 :                     continue;
    1473             :                 }
    1474             :             } else {
    1475           0 :                 if( /* handle U+0000..U+07FF inline */
    1476           0 :                     ch >= 0xc0 &&
    1477           0 :                     ((pSrcLimit - pSrc) >= 2) &&
    1478           0 :                     (uint8_t)(pSrc[1] - 0x80) <= 0x3f
    1479             :                 ) {
    1480           0 :                     reqLength++;
    1481           0 :                     pSrc += 2;
    1482           0 :                     continue;
    1483             :                 }
    1484             :             }
    1485             : 
    1486           0 :             if(subchar < 0) {
    1487           0 :                 *pErrorCode = U_INVALID_CHAR_FOUND;
    1488           0 :                 return NULL;
    1489             :             } else {
    1490             :                 /* function call for error cases */
    1491           0 :                 ++pSrc; /* continue after the lead byte */
    1492           0 :                 utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
    1493           0 :                 ++numSubstitutions;
    1494           0 :                 reqLength+=U16_LENGTH(ch);
    1495             :             }
    1496             :         }
    1497             :     }
    1498             : 
    1499           0 :     if(pNumSubstitutions!=NULL) {
    1500           0 :         *pNumSubstitutions=numSubstitutions;
    1501             :     }
    1502             : 
    1503           0 :     reqLength+=(int32_t)(pDest - dest);
    1504           0 :     if(pDestLength) {
    1505           0 :         *pDestLength = reqLength;
    1506             :     }
    1507             : 
    1508             :     /* Terminate the buffer */
    1509           0 :     u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
    1510           0 :     return dest;
    1511             : }
    1512             : 
    1513             : U_CAPI char* U_EXPORT2 
    1514           0 : u_strToJavaModifiedUTF8(
    1515             :         char *dest,
    1516             :         int32_t destCapacity,
    1517             :         int32_t *pDestLength,
    1518             :         const UChar *src, 
    1519             :         int32_t srcLength,
    1520             :         UErrorCode *pErrorCode) {
    1521           0 :     int32_t reqLength=0;
    1522           0 :     uint32_t ch=0;
    1523           0 :     uint8_t *pDest = (uint8_t *)dest;
    1524           0 :     uint8_t *pDestLimit = pDest + destCapacity;
    1525             :     const UChar *pSrcLimit;
    1526             :     int32_t count;
    1527             : 
    1528             :     /* args check */
    1529           0 :     if(U_FAILURE(*pErrorCode)){
    1530           0 :         return NULL;
    1531             :     }
    1532           0 :     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
    1533           0 :         (dest==NULL && destCapacity!=0) || destCapacity<0
    1534             :     ) {
    1535           0 :         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    1536           0 :         return NULL;
    1537             :     }
    1538             : 
    1539           0 :     if(srcLength==-1) {
    1540             :         /* Convert NUL-terminated ASCII, then find the string length. */
    1541           0 :         while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
    1542           0 :             *pDest++ = (uint8_t)ch;
    1543           0 :             ++src;
    1544             :         }
    1545           0 :         if(ch == 0) {
    1546           0 :             reqLength=(int32_t)(pDest - (uint8_t *)dest);
    1547           0 :             if(pDestLength) {
    1548           0 :                 *pDestLength = reqLength;
    1549             :             }
    1550             : 
    1551             :             /* Terminate the buffer */
    1552           0 :             u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
    1553           0 :             return dest;
    1554             :         }
    1555           0 :         srcLength = u_strlen(src);
    1556             :     }
    1557             : 
    1558             :     /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
    1559           0 :     pSrcLimit = (src!=NULL)?(src+srcLength):NULL;
    1560             :     for(;;) {
    1561           0 :         count = (int32_t)(pDestLimit - pDest);
    1562           0 :         srcLength = (int32_t)(pSrcLimit - src);
    1563           0 :         if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
    1564             :             /* fast ASCII loop */
    1565           0 :             const UChar *prevSrc = src;
    1566             :             int32_t delta;
    1567           0 :             while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
    1568           0 :                 *pDest++=(uint8_t)ch;
    1569           0 :                 ++src;
    1570             :             }
    1571           0 :             delta = (int32_t)(src - prevSrc);
    1572           0 :             count -= delta;
    1573           0 :             srcLength -= delta;
    1574             :         }
    1575             :         /*
    1576             :          * Each iteration of the inner loop progresses by at most 3 UTF-8
    1577             :          * bytes and one UChar.
    1578             :          */
    1579           0 :         count /= 3;
    1580           0 :         if(count > srcLength) {
    1581           0 :             count = srcLength; /* min(remaining dest/3, remaining src) */
    1582             :         }
    1583           0 :         if(count < 3) {
    1584             :             /*
    1585             :              * Too much overhead if we get near the end of the string,
    1586             :              * continue with the next loop.
    1587             :              */
    1588           0 :             break;
    1589             :         }
    1590           0 :         do {
    1591           0 :             ch=*src++;
    1592           0 :             if(ch <= 0x7f && ch != 0) {
    1593           0 :                 *pDest++ = (uint8_t)ch;
    1594           0 :             } else if(ch <= 0x7ff) {
    1595           0 :                 *pDest++=(uint8_t)((ch>>6)|0xc0);
    1596           0 :                 *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1597             :             } else {
    1598           0 :                 *pDest++=(uint8_t)((ch>>12)|0xe0);
    1599           0 :                 *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1600           0 :                 *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1601             :             }
    1602             :         } while(--count > 0);
    1603           0 :     }
    1604             : 
    1605           0 :     while(src<pSrcLimit) {
    1606           0 :         ch=*src++;
    1607           0 :         if(ch <= 0x7f && ch != 0) {
    1608           0 :             if(pDest<pDestLimit) {
    1609           0 :                 *pDest++ = (uint8_t)ch;
    1610             :             } else {
    1611           0 :                 reqLength = 1;
    1612           0 :                 break;
    1613             :             }
    1614           0 :         } else if(ch <= 0x7ff) {
    1615           0 :             if((pDestLimit - pDest) >= 2) {
    1616           0 :                 *pDest++=(uint8_t)((ch>>6)|0xc0);
    1617           0 :                 *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1618             :             } else {
    1619           0 :                 reqLength = 2;
    1620           0 :                 break;
    1621             :             }
    1622             :         } else {
    1623           0 :             if((pDestLimit - pDest) >= 3) {
    1624           0 :                 *pDest++=(uint8_t)((ch>>12)|0xe0);
    1625           0 :                 *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
    1626           0 :                 *pDest++=(uint8_t)((ch&0x3f)|0x80);
    1627             :             } else {
    1628           0 :                 reqLength = 3;
    1629           0 :                 break;
    1630             :             }
    1631             :         }
    1632             :     }
    1633           0 :     while(src<pSrcLimit) {
    1634           0 :         ch=*src++;
    1635           0 :         if(ch <= 0x7f && ch != 0) {
    1636           0 :             ++reqLength;
    1637           0 :         } else if(ch<=0x7ff) {
    1638           0 :             reqLength+=2;
    1639             :         } else {
    1640           0 :             reqLength+=3;
    1641             :         }
    1642             :     }
    1643             : 
    1644           0 :     reqLength+=(int32_t)(pDest - (uint8_t *)dest);
    1645           0 :     if(pDestLength){
    1646           0 :         *pDestLength = reqLength;
    1647             :     }
    1648             : 
    1649             :     /* Terminate the buffer */
    1650           0 :     u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
    1651           0 :     return dest;
    1652             : }

Generated by: LCOV version 1.13