LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnvbocu.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 534 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 8 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2002-2016, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  ucnvbocu.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2002mar27
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   This is an implementation of the Binary Ordered Compression for Unicode,
      19             : *   in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
      20             : */
      21             : 
      22             : #include "unicode/utypes.h"
      23             : 
      24             : #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
      25             : 
      26             : #include "unicode/ucnv.h"
      27             : #include "unicode/ucnv_cb.h"
      28             : #include "unicode/utf16.h"
      29             : #include "putilimp.h"
      30             : #include "ucnv_bld.h"
      31             : #include "ucnv_cnv.h"
      32             : #include "uassert.h"
      33             : 
      34             : /* BOCU-1 constants and macros ---------------------------------------------- */
      35             : 
      36             : /*
      37             :  * BOCU-1 encodes the code points of a Unicode string as
      38             :  * a sequence of byte-encoded differences (slope detection),
      39             :  * preserving lexical order.
      40             :  *
      41             :  * Optimize the difference-taking for runs of Unicode text within
      42             :  * small scripts:
      43             :  *
      44             :  * Most small scripts are allocated within aligned 128-blocks of Unicode
      45             :  * code points. Lexical order is preserved if the "previous code point" state
      46             :  * is always moved into the middle of such a block.
      47             :  *
      48             :  * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
      49             :  * areas into the middle of those areas.
      50             :  *
      51             :  * C0 control codes and space are encoded with their US-ASCII bytes.
      52             :  * "prev" is reset for C0 controls but not for space.
      53             :  */
      54             : 
      55             : /* initial value for "prev": middle of the ASCII range */
      56             : #define BOCU1_ASCII_PREV        0x40
      57             : 
      58             : /* bounding byte values for differences */
      59             : #define BOCU1_MIN               0x21
      60             : #define BOCU1_MIDDLE            0x90
      61             : #define BOCU1_MAX_LEAD          0xfe
      62             : #define BOCU1_MAX_TRAIL         0xff
      63             : #define BOCU1_RESET             0xff
      64             : 
      65             : /* number of lead bytes */
      66             : #define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
      67             : 
      68             : /* adjust trail byte counts for the use of some C0 control byte values */
      69             : #define BOCU1_TRAIL_CONTROLS_COUNT  20
      70             : #define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
      71             : 
      72             : /* number of trail bytes */
      73             : #define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
      74             : 
      75             : /*
      76             :  * number of positive and negative single-byte codes
      77             :  * (counting 0==BOCU1_MIDDLE among the positive ones)
      78             :  */
      79             : #define BOCU1_SINGLE            64
      80             : 
      81             : /* number of lead bytes for positive and negative 2/3/4-byte sequences */
      82             : #define BOCU1_LEAD_2            43
      83             : #define BOCU1_LEAD_3            3
      84             : #define BOCU1_LEAD_4            1
      85             : 
      86             : /* The difference value range for single-byters. */
      87             : #define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
      88             : #define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
      89             : 
      90             : /* The difference value range for double-byters. */
      91             : #define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
      92             : #define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
      93             : 
      94             : /* The difference value range for 3-byters. */
      95             : #define BOCU1_REACH_POS_3   \
      96             :     (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
      97             : 
      98             : #define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
      99             : 
     100             : /* The lead byte start values. */
     101             : #define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
     102             : #define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
     103             : #define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
     104             :      /* ==BOCU1_MAX_LEAD */
     105             : 
     106             : #define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
     107             : #define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
     108             : #define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
     109             :      /* ==BOCU1_MIN+1 */
     110             : 
     111             : /* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
     112             : #define BOCU1_LENGTH_FROM_LEAD(lead) \
     113             :     ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
     114             :      (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
     115             :      (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
     116             : 
     117             : /* The length of a byte sequence, according to its packed form. */
     118             : #define BOCU1_LENGTH_FROM_PACKED(packed) \
     119             :     ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
     120             : 
     121             : /*
     122             :  * 12 commonly used C0 control codes (and space) are only used to encode
     123             :  * themselves directly,
     124             :  * which makes BOCU-1 MIME-usable and reasonably safe for
     125             :  * ASCII-oriented software.
     126             :  *
     127             :  * These controls are
     128             :  *  0   NUL
     129             :  *
     130             :  *  7   BEL
     131             :  *  8   BS
     132             :  *
     133             :  *  9   TAB
     134             :  *  a   LF
     135             :  *  b   VT
     136             :  *  c   FF
     137             :  *  d   CR
     138             :  *
     139             :  *  e   SO
     140             :  *  f   SI
     141             :  *
     142             :  * 1a   SUB
     143             :  * 1b   ESC
     144             :  *
     145             :  * The other 20 C0 controls are also encoded directly (to preserve order)
     146             :  * but are also used as trail bytes in difference encoding
     147             :  * (for better compression).
     148             :  */
     149             : #define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
     150             : 
     151             : /*
     152             :  * Byte value map for control codes,
     153             :  * from external byte values 0x00..0x20
     154             :  * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
     155             :  * External byte values that are illegal as trail bytes are mapped to -1.
     156             :  */
     157             : static const int8_t
     158             : bocu1ByteToTrail[BOCU1_MIN]={
     159             : /*  0     1     2     3     4     5     6     7    */
     160             :     -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
     161             : 
     162             : /*  8     9     a     b     c     d     e     f    */
     163             :     -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
     164             : 
     165             : /*  10    11    12    13    14    15    16    17   */
     166             :     0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
     167             : 
     168             : /*  18    19    1a    1b    1c    1d    1e    1f   */
     169             :     0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
     170             : 
     171             : /*  20   */
     172             :     -1
     173             : };
     174             : 
     175             : /*
     176             :  * Byte value map for control codes,
     177             :  * from trail byte values 0..19 (0..0x13) as used in the difference calculation
     178             :  * to external byte values 0x00..0x20.
     179             :  */
     180             : static const int8_t
     181             : bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
     182             : /*  0     1     2     3     4     5     6     7    */
     183             :     0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
     184             : 
     185             : /*  8     9     a     b     c     d     e     f    */
     186             :     0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
     187             : 
     188             : /*  10    11    12    13   */
     189             :     0x1c, 0x1d, 0x1e, 0x1f
     190             : };
     191             : 
     192             : /**
     193             :  * Integer division and modulo with negative numerators
     194             :  * yields negative modulo results and quotients that are one more than
     195             :  * what we need here.
     196             :  * This macro adjust the results so that the modulo-value m is always >=0.
     197             :  *
     198             :  * For positive n, the if() condition is always FALSE.
     199             :  *
     200             :  * @param n Number to be split into quotient and rest.
     201             :  *          Will be modified to contain the quotient.
     202             :  * @param d Divisor.
     203             :  * @param m Output variable for the rest (modulo result).
     204             :  */
     205             : #define NEGDIVMOD(n, d, m) { \
     206             :     (m)=(n)%(d); \
     207             :     (n)/=(d); \
     208             :     if((m)<0) { \
     209             :         --(n); \
     210             :         (m)+=(d); \
     211             :     } \
     212             : }
     213             : 
     214             : /* Faster versions of packDiff() for single-byte-encoded diff values. */
     215             : 
     216             : /** Is a diff value encodable in a single byte? */
     217             : #define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
     218             : 
     219             : /** Encode a diff value in a single byte. */
     220             : #define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
     221             : 
     222             : /** Is a diff value encodable in two bytes? */
     223             : #define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
     224             : 
     225             : /* BOCU-1 implementation functions ------------------------------------------ */
     226             : 
     227             : #define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
     228             : 
     229             : /**
     230             :  * Compute the next "previous" value for differencing
     231             :  * from the current code point.
     232             :  *
     233             :  * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
     234             :  * @return "previous code point" state value
     235             :  */
     236             : static inline int32_t
     237           0 : bocu1Prev(int32_t c) {
     238             :     /* compute new prev */
     239           0 :     if(/* 0x3040<=c && */ c<=0x309f) {
     240             :         /* Hiragana is not 128-aligned */
     241           0 :         return 0x3070;
     242           0 :     } else if(0x4e00<=c && c<=0x9fa5) {
     243             :         /* CJK Unihan */
     244           0 :         return 0x4e00-BOCU1_REACH_NEG_2;
     245           0 :     } else if(0xac00<=c /* && c<=0xd7a3 */) {
     246             :         /* Korean Hangul */
     247           0 :         return (0xd7a3+0xac00)/2;
     248             :     } else {
     249             :         /* mostly small scripts */
     250           0 :         return BOCU1_SIMPLE_PREV(c);
     251             :     }
     252             : }
     253             : 
     254             : /** Fast version of bocu1Prev() for most scripts. */
     255             : #define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
     256             : 
     257             : /*
     258             :  * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
     259             :  * The UConverter fields are used as follows:
     260             :  *
     261             :  * fromUnicodeStatus    encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
     262             :  *
     263             :  * toUnicodeStatus      decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
     264             :  * mode                 decoder's incomplete (diff<<2)|count (ignored when toULength==0)
     265             :  */
     266             : 
     267             : /* BOCU-1-from-Unicode conversion functions --------------------------------- */
     268             : 
     269             : /**
     270             :  * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
     271             :  * and return a packed integer with them.
     272             :  *
     273             :  * The encoding favors small absolute differences with short encodings
     274             :  * to compress runs of same-script characters.
     275             :  *
     276             :  * Optimized version with unrolled loops and fewer floating-point operations
     277             :  * than the standard packDiff().
     278             :  *
     279             :  * @param diff difference value -0x10ffff..0x10ffff
     280             :  * @return
     281             :  *      0x010000zz for 1-byte sequence zz
     282             :  *      0x0200yyzz for 2-byte sequence yy zz
     283             :  *      0x03xxyyzz for 3-byte sequence xx yy zz
     284             :  *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
     285             :  */
     286             : static int32_t
     287           0 : packDiff(int32_t diff) {
     288             :     int32_t result, m;
     289             : 
     290           0 :     U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
     291           0 :     if(diff>=BOCU1_REACH_NEG_1) {
     292             :         /* mostly positive differences, and single-byte negative ones */
     293             : #if 0   /* single-byte case handled in macros, see below */
     294             :         if(diff<=BOCU1_REACH_POS_1) {
     295             :             /* single byte */
     296             :             return 0x01000000|(BOCU1_MIDDLE+diff);
     297             :         } else
     298             : #endif
     299           0 :         if(diff<=BOCU1_REACH_POS_2) {
     300             :             /* two bytes */
     301           0 :             diff-=BOCU1_REACH_POS_1+1;
     302           0 :             result=0x02000000;
     303             : 
     304           0 :             m=diff%BOCU1_TRAIL_COUNT;
     305           0 :             diff/=BOCU1_TRAIL_COUNT;
     306           0 :             result|=BOCU1_TRAIL_TO_BYTE(m);
     307             : 
     308           0 :             result|=(BOCU1_START_POS_2+diff)<<8;
     309           0 :         } else if(diff<=BOCU1_REACH_POS_3) {
     310             :             /* three bytes */
     311           0 :             diff-=BOCU1_REACH_POS_2+1;
     312           0 :             result=0x03000000;
     313             : 
     314           0 :             m=diff%BOCU1_TRAIL_COUNT;
     315           0 :             diff/=BOCU1_TRAIL_COUNT;
     316           0 :             result|=BOCU1_TRAIL_TO_BYTE(m);
     317             : 
     318           0 :             m=diff%BOCU1_TRAIL_COUNT;
     319           0 :             diff/=BOCU1_TRAIL_COUNT;
     320           0 :             result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
     321             : 
     322           0 :             result|=(BOCU1_START_POS_3+diff)<<16;
     323             :         } else {
     324             :             /* four bytes */
     325           0 :             diff-=BOCU1_REACH_POS_3+1;
     326             : 
     327           0 :             m=diff%BOCU1_TRAIL_COUNT;
     328           0 :             diff/=BOCU1_TRAIL_COUNT;
     329           0 :             result=BOCU1_TRAIL_TO_BYTE(m);
     330             : 
     331           0 :             m=diff%BOCU1_TRAIL_COUNT;
     332           0 :             diff/=BOCU1_TRAIL_COUNT;
     333           0 :             result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
     334             : 
     335             :             /*
     336             :              * We know that / and % would deliver quotient 0 and rest=diff.
     337             :              * Avoid division and modulo for performance.
     338             :              */
     339           0 :             result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
     340             : 
     341           0 :             result|=((uint32_t)BOCU1_START_POS_4)<<24;
     342             :         }
     343             :     } else {
     344             :         /* two- to four-byte negative differences */
     345           0 :         if(diff>=BOCU1_REACH_NEG_2) {
     346             :             /* two bytes */
     347           0 :             diff-=BOCU1_REACH_NEG_1;
     348           0 :             result=0x02000000;
     349             : 
     350           0 :             NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     351           0 :             result|=BOCU1_TRAIL_TO_BYTE(m);
     352             : 
     353           0 :             result|=(BOCU1_START_NEG_2+diff)<<8;
     354           0 :         } else if(diff>=BOCU1_REACH_NEG_3) {
     355             :             /* three bytes */
     356           0 :             diff-=BOCU1_REACH_NEG_2;
     357           0 :             result=0x03000000;
     358             : 
     359           0 :             NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     360           0 :             result|=BOCU1_TRAIL_TO_BYTE(m);
     361             : 
     362           0 :             NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     363           0 :             result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
     364             : 
     365           0 :             result|=(BOCU1_START_NEG_3+diff)<<16;
     366             :         } else {
     367             :             /* four bytes */
     368           0 :             diff-=BOCU1_REACH_NEG_3;
     369             : 
     370           0 :             NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     371           0 :             result=BOCU1_TRAIL_TO_BYTE(m);
     372             : 
     373           0 :             NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     374           0 :             result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
     375             : 
     376             :             /*
     377             :              * We know that NEGDIVMOD would deliver
     378             :              * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
     379             :              * Avoid division and modulo for performance.
     380             :              */
     381           0 :             m=diff+BOCU1_TRAIL_COUNT;
     382           0 :             result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
     383             : 
     384           0 :             result|=BOCU1_MIN<<24;
     385             :         }
     386             :     }
     387           0 :     return result;
     388             : }
     389             : 
     390             : 
     391             : static void U_CALLCONV
     392           0 : _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     393             :                              UErrorCode *pErrorCode) {
     394             :     UConverter *cnv;
     395             :     const UChar *source, *sourceLimit;
     396             :     uint8_t *target;
     397             :     int32_t targetCapacity;
     398             :     int32_t *offsets;
     399             : 
     400             :     int32_t prev, c, diff;
     401             : 
     402             :     int32_t sourceIndex, nextSourceIndex;
     403             : 
     404             :     /* set up the local pointers */
     405           0 :     cnv=pArgs->converter;
     406           0 :     source=pArgs->source;
     407           0 :     sourceLimit=pArgs->sourceLimit;
     408           0 :     target=(uint8_t *)pArgs->target;
     409           0 :     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
     410           0 :     offsets=pArgs->offsets;
     411             : 
     412             :     /* get the converter state from UConverter */
     413           0 :     c=cnv->fromUChar32;
     414           0 :     prev=(int32_t)cnv->fromUnicodeStatus;
     415           0 :     if(prev==0) {
     416           0 :         prev=BOCU1_ASCII_PREV;
     417             :     }
     418             : 
     419             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     420           0 :     sourceIndex= c==0 ? 0 : -1;
     421           0 :     nextSourceIndex=0;
     422             : 
     423             :     /* conversion loop */
     424           0 :     if(c!=0 && targetCapacity>0) {
     425           0 :         goto getTrail;
     426             :     }
     427             : 
     428             : fastSingle:
     429             :     /* fast loop for single-byte differences */
     430             :     /* use only one loop counter variable, targetCapacity, not also source */
     431           0 :     diff=(int32_t)(sourceLimit-source);
     432           0 :     if(targetCapacity>diff) {
     433           0 :         targetCapacity=diff;
     434             :     }
     435           0 :     while(targetCapacity>0 && (c=*source)<0x3000) {
     436           0 :         if(c<=0x20) {
     437           0 :             if(c!=0x20) {
     438           0 :                 prev=BOCU1_ASCII_PREV;
     439             :             }
     440           0 :             *target++=(uint8_t)c;
     441           0 :             *offsets++=nextSourceIndex++;
     442           0 :             ++source;
     443           0 :             --targetCapacity;
     444             :         } else {
     445           0 :             diff=c-prev;
     446           0 :             if(DIFF_IS_SINGLE(diff)) {
     447           0 :                 prev=BOCU1_SIMPLE_PREV(c);
     448           0 :                 *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
     449           0 :                 *offsets++=nextSourceIndex++;
     450           0 :                 ++source;
     451           0 :                 --targetCapacity;
     452             :             } else {
     453             :                 break;
     454             :             }
     455             :         }
     456             :     }
     457             :     /* restore real values */
     458           0 :     targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
     459           0 :     sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
     460             : 
     461             :     /* regular loop for all cases */
     462           0 :     while(source<sourceLimit) {
     463           0 :         if(targetCapacity>0) {
     464           0 :             c=*source++;
     465           0 :             ++nextSourceIndex;
     466             : 
     467           0 :             if(c<=0x20) {
     468             :                 /*
     469             :                  * ISO C0 control & space:
     470             :                  * Encode directly for MIME compatibility,
     471             :                  * and reset state except for space, to not disrupt compression.
     472             :                  */
     473           0 :                 if(c!=0x20) {
     474           0 :                     prev=BOCU1_ASCII_PREV;
     475             :                 }
     476           0 :                 *target++=(uint8_t)c;
     477           0 :                 *offsets++=sourceIndex;
     478           0 :                 --targetCapacity;
     479             : 
     480           0 :                 sourceIndex=nextSourceIndex;
     481           0 :                 continue;
     482             :             }
     483             : 
     484           0 :             if(U16_IS_LEAD(c)) {
     485             : getTrail:
     486           0 :                 if(source<sourceLimit) {
     487             :                     /* test the following code unit */
     488           0 :                     UChar trail=*source;
     489           0 :                     if(U16_IS_TRAIL(trail)) {
     490           0 :                         ++source;
     491           0 :                         ++nextSourceIndex;
     492           0 :                         c=U16_GET_SUPPLEMENTARY(c, trail);
     493             :                     }
     494             :                 } else {
     495             :                     /* no more input */
     496           0 :                     c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
     497           0 :                     break;
     498             :                 }
     499             :             }
     500             : 
     501             :             /*
     502             :              * all other Unicode code points c==U+0021..U+10ffff
     503             :              * are encoded with the difference c-prev
     504             :              *
     505             :              * a new prev is computed from c,
     506             :              * placed in the middle of a 0x80-block (for most small scripts) or
     507             :              * in the middle of the Unihan and Hangul blocks
     508             :              * to statistically minimize the following difference
     509             :              */
     510           0 :             diff=c-prev;
     511           0 :             prev=BOCU1_PREV(c);
     512           0 :             if(DIFF_IS_SINGLE(diff)) {
     513           0 :                 *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
     514           0 :                 *offsets++=sourceIndex;
     515           0 :                 --targetCapacity;
     516           0 :                 sourceIndex=nextSourceIndex;
     517           0 :                 if(c<0x3000) {
     518           0 :                     goto fastSingle;
     519             :                 }
     520           0 :             } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
     521             :                 /* optimize 2-byte case */
     522             :                 int32_t m;
     523             : 
     524           0 :                 if(diff>=0) {
     525           0 :                     diff-=BOCU1_REACH_POS_1+1;
     526           0 :                     m=diff%BOCU1_TRAIL_COUNT;
     527           0 :                     diff/=BOCU1_TRAIL_COUNT;
     528           0 :                     diff+=BOCU1_START_POS_2;
     529             :                 } else {
     530           0 :                     diff-=BOCU1_REACH_NEG_1;
     531           0 :                     NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     532           0 :                     diff+=BOCU1_START_NEG_2;
     533             :                 }
     534           0 :                 *target++=(uint8_t)diff;
     535           0 :                 *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
     536           0 :                 *offsets++=sourceIndex;
     537           0 :                 *offsets++=sourceIndex;
     538           0 :                 targetCapacity-=2;
     539           0 :                 sourceIndex=nextSourceIndex;
     540             :             } else {
     541             :                 int32_t length; /* will be 2..4 */
     542             : 
     543           0 :                 diff=packDiff(diff);
     544           0 :                 length=BOCU1_LENGTH_FROM_PACKED(diff);
     545             : 
     546             :                 /* write the output character bytes from diff and length */
     547             :                 /* from the first if in the loop we know that targetCapacity>0 */
     548           0 :                 if(length<=targetCapacity) {
     549           0 :                     switch(length) {
     550             :                         /* each branch falls through to the next one */
     551             :                     case 4:
     552           0 :                         *target++=(uint8_t)(diff>>24);
     553           0 :                         *offsets++=sourceIndex;
     554             :                         U_FALLTHROUGH;
     555             :                     case 3:
     556           0 :                         *target++=(uint8_t)(diff>>16);
     557           0 :                         *offsets++=sourceIndex;
     558             :                         U_FALLTHROUGH;
     559             :                     case 2:
     560           0 :                         *target++=(uint8_t)(diff>>8);
     561           0 :                         *offsets++=sourceIndex;
     562             :                     /* case 1: handled above */
     563           0 :                         *target++=(uint8_t)diff;
     564           0 :                         *offsets++=sourceIndex;
     565             :                         U_FALLTHROUGH;
     566             :                     default:
     567             :                         /* will never occur */
     568           0 :                         break;
     569             :                     }
     570           0 :                     targetCapacity-=length;
     571           0 :                     sourceIndex=nextSourceIndex;
     572             :                 } else {
     573             :                     uint8_t *charErrorBuffer;
     574             : 
     575             :                     /*
     576             :                      * We actually do this backwards here:
     577             :                      * In order to save an intermediate variable, we output
     578             :                      * first to the overflow buffer what does not fit into the
     579             :                      * regular target.
     580             :                      */
     581             :                     /* we know that 1<=targetCapacity<length<=4 */
     582           0 :                     length-=targetCapacity;
     583           0 :                     charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
     584           0 :                     switch(length) {
     585             :                         /* each branch falls through to the next one */
     586             :                     case 3:
     587           0 :                         *charErrorBuffer++=(uint8_t)(diff>>16);
     588             :                         U_FALLTHROUGH;
     589             :                     case 2:
     590           0 :                         *charErrorBuffer++=(uint8_t)(diff>>8);
     591             :                         U_FALLTHROUGH;
     592             :                     case 1:
     593           0 :                         *charErrorBuffer=(uint8_t)diff;
     594             :                         U_FALLTHROUGH;
     595             :                     default:
     596             :                         /* will never occur */
     597           0 :                         break;
     598             :                     }
     599           0 :                     cnv->charErrorBufferLength=(int8_t)length;
     600             : 
     601             :                     /* now output what fits into the regular target */
     602           0 :                     diff>>=8*length; /* length was reduced by targetCapacity */
     603           0 :                     switch(targetCapacity) {
     604             :                         /* each branch falls through to the next one */
     605             :                     case 3:
     606           0 :                         *target++=(uint8_t)(diff>>16);
     607           0 :                         *offsets++=sourceIndex;
     608             :                         U_FALLTHROUGH;
     609             :                     case 2:
     610           0 :                         *target++=(uint8_t)(diff>>8);
     611           0 :                         *offsets++=sourceIndex;
     612             :                         U_FALLTHROUGH;
     613             :                     case 1:
     614           0 :                         *target++=(uint8_t)diff;
     615           0 :                         *offsets++=sourceIndex;
     616             :                         U_FALLTHROUGH;
     617             :                     default:
     618             :                         /* will never occur */
     619           0 :                         break;
     620             :                     }
     621             : 
     622             :                     /* target overflow */
     623           0 :                     targetCapacity=0;
     624           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     625           0 :                     break;
     626             :                 }
     627             :             }
     628             :         } else {
     629             :             /* target is full */
     630           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     631           0 :             break;
     632             :         }
     633             :     }
     634             : 
     635             :     /* set the converter state back into UConverter */
     636           0 :     cnv->fromUChar32= c<0 ? -c : 0;
     637           0 :     cnv->fromUnicodeStatus=(uint32_t)prev;
     638             : 
     639             :     /* write back the updated pointers */
     640           0 :     pArgs->source=source;
     641           0 :     pArgs->target=(char *)target;
     642           0 :     pArgs->offsets=offsets;
     643           0 : }
     644             : 
     645             : /*
     646             :  * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
     647             :  * If a change is made in the original function, then either
     648             :  * change this function the same way or
     649             :  * re-copy the original function and remove the variables
     650             :  * offsets, sourceIndex, and nextSourceIndex.
     651             :  */
     652             : static void U_CALLCONV
     653           0 : _Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
     654             :                   UErrorCode *pErrorCode) {
     655             :     UConverter *cnv;
     656             :     const UChar *source, *sourceLimit;
     657             :     uint8_t *target;
     658             :     int32_t targetCapacity;
     659             : 
     660             :     int32_t prev, c, diff;
     661             : 
     662             :     /* set up the local pointers */
     663           0 :     cnv=pArgs->converter;
     664           0 :     source=pArgs->source;
     665           0 :     sourceLimit=pArgs->sourceLimit;
     666           0 :     target=(uint8_t *)pArgs->target;
     667           0 :     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
     668             : 
     669             :     /* get the converter state from UConverter */
     670           0 :     c=cnv->fromUChar32;
     671           0 :     prev=(int32_t)cnv->fromUnicodeStatus;
     672           0 :     if(prev==0) {
     673           0 :         prev=BOCU1_ASCII_PREV;
     674             :     }
     675             : 
     676             :     /* conversion loop */
     677           0 :     if(c!=0 && targetCapacity>0) {
     678           0 :         goto getTrail;
     679             :     }
     680             : 
     681             : fastSingle:
     682             :     /* fast loop for single-byte differences */
     683             :     /* use only one loop counter variable, targetCapacity, not also source */
     684           0 :     diff=(int32_t)(sourceLimit-source);
     685           0 :     if(targetCapacity>diff) {
     686           0 :         targetCapacity=diff;
     687             :     }
     688           0 :     while(targetCapacity>0 && (c=*source)<0x3000) {
     689           0 :         if(c<=0x20) {
     690           0 :             if(c!=0x20) {
     691           0 :                 prev=BOCU1_ASCII_PREV;
     692             :             }
     693           0 :             *target++=(uint8_t)c;
     694             :         } else {
     695           0 :             diff=c-prev;
     696           0 :             if(DIFF_IS_SINGLE(diff)) {
     697           0 :                 prev=BOCU1_SIMPLE_PREV(c);
     698           0 :                 *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
     699             :             } else {
     700             :                 break;
     701             :             }
     702             :         }
     703           0 :         ++source;
     704           0 :         --targetCapacity;
     705             :     }
     706             :     /* restore real values */
     707           0 :     targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
     708             : 
     709             :     /* regular loop for all cases */
     710           0 :     while(source<sourceLimit) {
     711           0 :         if(targetCapacity>0) {
     712           0 :             c=*source++;
     713             : 
     714           0 :             if(c<=0x20) {
     715             :                 /*
     716             :                  * ISO C0 control & space:
     717             :                  * Encode directly for MIME compatibility,
     718             :                  * and reset state except for space, to not disrupt compression.
     719             :                  */
     720           0 :                 if(c!=0x20) {
     721           0 :                     prev=BOCU1_ASCII_PREV;
     722             :                 }
     723           0 :                 *target++=(uint8_t)c;
     724           0 :                 --targetCapacity;
     725           0 :                 continue;
     726             :             }
     727             : 
     728           0 :             if(U16_IS_LEAD(c)) {
     729             : getTrail:
     730           0 :                 if(source<sourceLimit) {
     731             :                     /* test the following code unit */
     732           0 :                     UChar trail=*source;
     733           0 :                     if(U16_IS_TRAIL(trail)) {
     734           0 :                         ++source;
     735           0 :                         c=U16_GET_SUPPLEMENTARY(c, trail);
     736             :                     }
     737             :                 } else {
     738             :                     /* no more input */
     739           0 :                     c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
     740           0 :                     break;
     741             :                 }
     742             :             }
     743             : 
     744             :             /*
     745             :              * all other Unicode code points c==U+0021..U+10ffff
     746             :              * are encoded with the difference c-prev
     747             :              *
     748             :              * a new prev is computed from c,
     749             :              * placed in the middle of a 0x80-block (for most small scripts) or
     750             :              * in the middle of the Unihan and Hangul blocks
     751             :              * to statistically minimize the following difference
     752             :              */
     753           0 :             diff=c-prev;
     754           0 :             prev=BOCU1_PREV(c);
     755           0 :             if(DIFF_IS_SINGLE(diff)) {
     756           0 :                 *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
     757           0 :                 --targetCapacity;
     758           0 :                 if(c<0x3000) {
     759           0 :                     goto fastSingle;
     760             :                 }
     761           0 :             } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
     762             :                 /* optimize 2-byte case */
     763             :                 int32_t m;
     764             : 
     765           0 :                 if(diff>=0) {
     766           0 :                     diff-=BOCU1_REACH_POS_1+1;
     767           0 :                     m=diff%BOCU1_TRAIL_COUNT;
     768           0 :                     diff/=BOCU1_TRAIL_COUNT;
     769           0 :                     diff+=BOCU1_START_POS_2;
     770             :                 } else {
     771           0 :                     diff-=BOCU1_REACH_NEG_1;
     772           0 :                     NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
     773           0 :                     diff+=BOCU1_START_NEG_2;
     774             :                 }
     775           0 :                 *target++=(uint8_t)diff;
     776           0 :                 *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
     777           0 :                 targetCapacity-=2;
     778             :             } else {
     779             :                 int32_t length; /* will be 2..4 */
     780             : 
     781           0 :                 diff=packDiff(diff);
     782           0 :                 length=BOCU1_LENGTH_FROM_PACKED(diff);
     783             : 
     784             :                 /* write the output character bytes from diff and length */
     785             :                 /* from the first if in the loop we know that targetCapacity>0 */
     786           0 :                 if(length<=targetCapacity) {
     787           0 :                     switch(length) {
     788             :                         /* each branch falls through to the next one */
     789             :                     case 4:
     790           0 :                         *target++=(uint8_t)(diff>>24);
     791             :                         U_FALLTHROUGH;
     792             :                     case 3:
     793           0 :                         *target++=(uint8_t)(diff>>16);
     794             :                     /* case 2: handled above */
     795           0 :                         *target++=(uint8_t)(diff>>8);
     796             :                     /* case 1: handled above */
     797           0 :                         *target++=(uint8_t)diff;
     798             :                         U_FALLTHROUGH;
     799             :                     default:
     800             :                         /* will never occur */
     801           0 :                         break;
     802             :                     }
     803           0 :                     targetCapacity-=length;
     804             :                 } else {
     805             :                     uint8_t *charErrorBuffer;
     806             : 
     807             :                     /*
     808             :                      * We actually do this backwards here:
     809             :                      * In order to save an intermediate variable, we output
     810             :                      * first to the overflow buffer what does not fit into the
     811             :                      * regular target.
     812             :                      */
     813             :                     /* we know that 1<=targetCapacity<length<=4 */
     814           0 :                     length-=targetCapacity;
     815           0 :                     charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
     816           0 :                     switch(length) {
     817             :                         /* each branch falls through to the next one */
     818             :                     case 3:
     819           0 :                         *charErrorBuffer++=(uint8_t)(diff>>16);
     820             :                         U_FALLTHROUGH;
     821             :                     case 2:
     822           0 :                         *charErrorBuffer++=(uint8_t)(diff>>8);
     823             :                         U_FALLTHROUGH;
     824             :                     case 1:
     825           0 :                         *charErrorBuffer=(uint8_t)diff;
     826             :                         U_FALLTHROUGH;
     827             :                     default:
     828             :                         /* will never occur */
     829           0 :                         break;
     830             :                     }
     831           0 :                     cnv->charErrorBufferLength=(int8_t)length;
     832             : 
     833             :                     /* now output what fits into the regular target */
     834           0 :                     diff>>=8*length; /* length was reduced by targetCapacity */
     835           0 :                     switch(targetCapacity) {
     836             :                         /* each branch falls through to the next one */
     837             :                     case 3:
     838           0 :                         *target++=(uint8_t)(diff>>16);
     839             :                         U_FALLTHROUGH;
     840             :                     case 2:
     841           0 :                         *target++=(uint8_t)(diff>>8);
     842             :                         U_FALLTHROUGH;
     843             :                     case 1:
     844           0 :                         *target++=(uint8_t)diff;
     845             :                         U_FALLTHROUGH;
     846             :                     default:
     847             :                         /* will never occur */
     848           0 :                         break;
     849             :                     }
     850             : 
     851             :                     /* target overflow */
     852           0 :                     targetCapacity=0;
     853           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     854           0 :                     break;
     855             :                 }
     856             :             }
     857             :         } else {
     858             :             /* target is full */
     859           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     860           0 :             break;
     861             :         }
     862             :     }
     863             : 
     864             :     /* set the converter state back into UConverter */
     865           0 :     cnv->fromUChar32= c<0 ? -c : 0;
     866           0 :     cnv->fromUnicodeStatus=(uint32_t)prev;
     867             : 
     868             :     /* write back the updated pointers */
     869           0 :     pArgs->source=source;
     870           0 :     pArgs->target=(char *)target;
     871           0 : }
     872             : 
     873             : /* BOCU-1-to-Unicode conversion functions ----------------------------------- */
     874             : 
     875             : /**
     876             :  * Function for BOCU-1 decoder; handles multi-byte lead bytes.
     877             :  *
     878             :  * @param b lead byte;
     879             :  *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
     880             :  * @return (diff<<2)|count
     881             :  */
     882             : static inline int32_t
     883           0 : decodeBocu1LeadByte(int32_t b) {
     884             :     int32_t diff, count;
     885             : 
     886           0 :     if(b>=BOCU1_START_NEG_2) {
     887             :         /* positive difference */
     888           0 :         if(b<BOCU1_START_POS_3) {
     889             :             /* two bytes */
     890           0 :             diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
     891           0 :             count=1;
     892           0 :         } else if(b<BOCU1_START_POS_4) {
     893             :             /* three bytes */
     894           0 :             diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
     895           0 :             count=2;
     896             :         } else {
     897             :             /* four bytes */
     898           0 :             diff=BOCU1_REACH_POS_3+1;
     899           0 :             count=3;
     900             :         }
     901             :     } else {
     902             :         /* negative difference */
     903           0 :         if(b>=BOCU1_START_NEG_3) {
     904             :             /* two bytes */
     905           0 :             diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
     906           0 :             count=1;
     907           0 :         } else if(b>BOCU1_MIN) {
     908             :             /* three bytes */
     909           0 :             diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
     910           0 :             count=2;
     911             :         } else {
     912             :             /* four bytes */
     913           0 :             diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
     914           0 :             count=3;
     915             :         }
     916             :     }
     917             : 
     918             :     /* return the state for decoding the trail byte(s) */
     919           0 :     return (diff<<2)|count;
     920             : }
     921             : 
     922             : /**
     923             :  * Function for BOCU-1 decoder; handles multi-byte trail bytes.
     924             :  *
     925             :  * @param count number of remaining trail bytes including this one
     926             :  * @param b trail byte
     927             :  * @return new delta for diff including b - <0 indicates an error
     928             :  *
     929             :  * @see decodeBocu1
     930             :  */
     931             : static inline int32_t
     932           0 : decodeBocu1TrailByte(int32_t count, int32_t b) {
     933           0 :     if(b<=0x20) {
     934             :         /* skip some C0 controls and make the trail byte range contiguous */
     935           0 :         b=bocu1ByteToTrail[b];
     936             :         /* b<0 for an illegal trail byte value will result in return<0 below */
     937             : #if BOCU1_MAX_TRAIL<0xff
     938             :     } else if(b>BOCU1_MAX_TRAIL) {
     939             :         return -99;
     940             : #endif
     941             :     } else {
     942           0 :         b-=BOCU1_TRAIL_BYTE_OFFSET;
     943             :     }
     944             : 
     945             :     /* add trail byte into difference and decrement count */
     946           0 :     if(count==1) {
     947           0 :         return b;
     948           0 :     } else if(count==2) {
     949           0 :         return b*BOCU1_TRAIL_COUNT;
     950             :     } else /* count==3 */ {
     951           0 :         return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
     952             :     }
     953             : }
     954             : 
     955             : static void U_CALLCONV
     956           0 : _Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     957             :                            UErrorCode *pErrorCode) {
     958             :     UConverter *cnv;
     959             :     const uint8_t *source, *sourceLimit;
     960             :     UChar *target;
     961             :     const UChar *targetLimit;
     962             :     int32_t *offsets;
     963             : 
     964             :     int32_t prev, count, diff, c;
     965             : 
     966             :     int8_t byteIndex;
     967             :     uint8_t *bytes;
     968             : 
     969             :     int32_t sourceIndex, nextSourceIndex;
     970             : 
     971             :     /* set up the local pointers */
     972           0 :     cnv=pArgs->converter;
     973           0 :     source=(const uint8_t *)pArgs->source;
     974           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
     975           0 :     target=pArgs->target;
     976           0 :     targetLimit=pArgs->targetLimit;
     977           0 :     offsets=pArgs->offsets;
     978             : 
     979             :     /* get the converter state from UConverter */
     980           0 :     prev=(int32_t)cnv->toUnicodeStatus;
     981           0 :     if(prev==0) {
     982           0 :         prev=BOCU1_ASCII_PREV;
     983             :     }
     984           0 :     diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
     985           0 :     count=diff&3;
     986           0 :     diff>>=2;
     987             : 
     988           0 :     byteIndex=cnv->toULength;
     989           0 :     bytes=cnv->toUBytes;
     990             : 
     991             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     992           0 :     sourceIndex=byteIndex==0 ? 0 : -1;
     993           0 :     nextSourceIndex=0;
     994             : 
     995             :     /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
     996           0 :     if(count>0 && byteIndex>0 && target<targetLimit) {
     997           0 :         goto getTrail;
     998             :     }
     999             : 
    1000             : fastSingle:
    1001             :     /* fast loop for single-byte differences */
    1002             :     /* use count as the only loop counter variable */
    1003           0 :     diff=(int32_t)(sourceLimit-source);
    1004           0 :     count=(int32_t)(pArgs->targetLimit-target);
    1005           0 :     if(count>diff) {
    1006           0 :         count=diff;
    1007             :     }
    1008           0 :     while(count>0) {
    1009           0 :         if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
    1010           0 :             c=prev+(c-BOCU1_MIDDLE);
    1011           0 :             if(c<0x3000) {
    1012           0 :                 *target++=(UChar)c;
    1013           0 :                 *offsets++=nextSourceIndex++;
    1014           0 :                 prev=BOCU1_SIMPLE_PREV(c);
    1015             :             } else {
    1016           0 :                 break;
    1017             :             }
    1018           0 :         } else if(c<=0x20) {
    1019           0 :             if(c!=0x20) {
    1020           0 :                 prev=BOCU1_ASCII_PREV;
    1021             :             }
    1022           0 :             *target++=(UChar)c;
    1023           0 :             *offsets++=nextSourceIndex++;
    1024             :         } else {
    1025           0 :             break;
    1026             :         }
    1027           0 :         ++source;
    1028           0 :         --count;
    1029             :     }
    1030           0 :     sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
    1031             : 
    1032             :     /* decode a sequence of single and lead bytes */
    1033           0 :     while(source<sourceLimit) {
    1034           0 :         if(target>=targetLimit) {
    1035             :             /* target is full */
    1036           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1037           0 :             break;
    1038             :         }
    1039             : 
    1040           0 :         ++nextSourceIndex;
    1041           0 :         c=*source++;
    1042           0 :         if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
    1043             :             /* Write a code point directly from a single-byte difference. */
    1044           0 :             c=prev+(c-BOCU1_MIDDLE);
    1045           0 :             if(c<0x3000) {
    1046           0 :                 *target++=(UChar)c;
    1047           0 :                 *offsets++=sourceIndex;
    1048           0 :                 prev=BOCU1_SIMPLE_PREV(c);
    1049           0 :                 sourceIndex=nextSourceIndex;
    1050           0 :                 goto fastSingle;
    1051             :             }
    1052           0 :         } else if(c<=0x20) {
    1053             :             /*
    1054             :              * Direct-encoded C0 control code or space.
    1055             :              * Reset prev for C0 control codes but not for space.
    1056             :              */
    1057           0 :             if(c!=0x20) {
    1058           0 :                 prev=BOCU1_ASCII_PREV;
    1059             :             }
    1060           0 :             *target++=(UChar)c;
    1061           0 :             *offsets++=sourceIndex;
    1062           0 :             sourceIndex=nextSourceIndex;
    1063           0 :             continue;
    1064           0 :         } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
    1065             :             /* Optimize two-byte case. */
    1066           0 :             if(c>=BOCU1_MIDDLE) {
    1067           0 :                 diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
    1068             :             } else {
    1069           0 :                 diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
    1070             :             }
    1071             : 
    1072             :             /* trail byte */
    1073           0 :             ++nextSourceIndex;
    1074           0 :             c=decodeBocu1TrailByte(1, *source++);
    1075           0 :             if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
    1076           0 :                 bytes[0]=source[-2];
    1077           0 :                 bytes[1]=source[-1];
    1078           0 :                 byteIndex=2;
    1079           0 :                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1080           0 :                 break;
    1081             :             }
    1082           0 :         } else if(c==BOCU1_RESET) {
    1083             :             /* only reset the state, no code point */
    1084           0 :             prev=BOCU1_ASCII_PREV;
    1085           0 :             sourceIndex=nextSourceIndex;
    1086           0 :             continue;
    1087             :         } else {
    1088             :             /*
    1089             :              * For multi-byte difference lead bytes, set the decoder state
    1090             :              * with the partial difference value from the lead byte and
    1091             :              * with the number of trail bytes.
    1092             :              */
    1093           0 :             bytes[0]=(uint8_t)c;
    1094           0 :             byteIndex=1;
    1095             : 
    1096           0 :             diff=decodeBocu1LeadByte(c);
    1097           0 :             count=diff&3;
    1098           0 :             diff>>=2;
    1099             : getTrail:
    1100             :             for(;;) {
    1101           0 :                 if(source>=sourceLimit) {
    1102           0 :                     goto endloop;
    1103             :                 }
    1104           0 :                 ++nextSourceIndex;
    1105           0 :                 c=bytes[byteIndex++]=*source++;
    1106             : 
    1107             :                 /* trail byte in any position */
    1108           0 :                 c=decodeBocu1TrailByte(count, c);
    1109           0 :                 if(c<0) {
    1110           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1111           0 :                     goto endloop;
    1112             :                 }
    1113             : 
    1114           0 :                 diff+=c;
    1115           0 :                 if(--count==0) {
    1116             :                     /* final trail byte, deliver a code point */
    1117           0 :                     byteIndex=0;
    1118           0 :                     c=prev+diff;
    1119           0 :                     if((uint32_t)c>0x10ffff) {
    1120           0 :                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1121           0 :                         goto endloop;
    1122             :                     }
    1123           0 :                     break;
    1124             :                 }
    1125             :             }
    1126             :         }
    1127             : 
    1128             :         /* calculate the next prev and output c */
    1129           0 :         prev=BOCU1_PREV(c);
    1130           0 :         if(c<=0xffff) {
    1131           0 :             *target++=(UChar)c;
    1132           0 :             *offsets++=sourceIndex;
    1133             :         } else {
    1134             :             /* output surrogate pair */
    1135           0 :             *target++=U16_LEAD(c);
    1136           0 :             if(target<targetLimit) {
    1137           0 :                 *target++=U16_TRAIL(c);
    1138           0 :                 *offsets++=sourceIndex;
    1139           0 :                 *offsets++=sourceIndex;
    1140             :             } else {
    1141             :                 /* target overflow */
    1142           0 :                 *offsets++=sourceIndex;
    1143           0 :                 cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
    1144           0 :                 cnv->UCharErrorBufferLength=1;
    1145           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1146           0 :                 break;
    1147             :             }
    1148             :         }
    1149           0 :         sourceIndex=nextSourceIndex;
    1150             :     }
    1151             : endloop:
    1152             : 
    1153           0 :     if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
    1154             :         /* set the converter state in UConverter to deal with the next character */
    1155           0 :         cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
    1156           0 :         cnv->mode=0;
    1157             :     } else {
    1158             :         /* set the converter state back into UConverter */
    1159           0 :         cnv->toUnicodeStatus=(uint32_t)prev;
    1160           0 :         cnv->mode=(diff<<2)|count;
    1161             :     }
    1162           0 :     cnv->toULength=byteIndex;
    1163             : 
    1164             :     /* write back the updated pointers */
    1165           0 :     pArgs->source=(const char *)source;
    1166           0 :     pArgs->target=target;
    1167           0 :     pArgs->offsets=offsets;
    1168           0 :     return;
    1169             : }
    1170             : 
    1171             : /*
    1172             :  * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
    1173             :  * If a change is made in the original function, then either
    1174             :  * change this function the same way or
    1175             :  * re-copy the original function and remove the variables
    1176             :  * offsets, sourceIndex, and nextSourceIndex.
    1177             :  */
    1178             : static void U_CALLCONV
    1179           0 : _Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
    1180             :                 UErrorCode *pErrorCode) {
    1181             :     UConverter *cnv;
    1182             :     const uint8_t *source, *sourceLimit;
    1183             :     UChar *target;
    1184             :     const UChar *targetLimit;
    1185             : 
    1186             :     int32_t prev, count, diff, c;
    1187             : 
    1188             :     int8_t byteIndex;
    1189             :     uint8_t *bytes;
    1190             : 
    1191             :     /* set up the local pointers */
    1192           0 :     cnv=pArgs->converter;
    1193           0 :     source=(const uint8_t *)pArgs->source;
    1194           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    1195           0 :     target=pArgs->target;
    1196           0 :     targetLimit=pArgs->targetLimit;
    1197             : 
    1198             :     /* get the converter state from UConverter */
    1199           0 :     prev=(int32_t)cnv->toUnicodeStatus;
    1200           0 :     if(prev==0) {
    1201           0 :         prev=BOCU1_ASCII_PREV;
    1202             :     }
    1203           0 :     diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
    1204           0 :     count=diff&3;
    1205           0 :     diff>>=2;
    1206             : 
    1207           0 :     byteIndex=cnv->toULength;
    1208           0 :     bytes=cnv->toUBytes;
    1209             : 
    1210             :     /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
    1211           0 :     if(count>0 && byteIndex>0 && target<targetLimit) {
    1212           0 :         goto getTrail;
    1213             :     }
    1214             : 
    1215             : fastSingle:
    1216             :     /* fast loop for single-byte differences */
    1217             :     /* use count as the only loop counter variable */
    1218           0 :     diff=(int32_t)(sourceLimit-source);
    1219           0 :     count=(int32_t)(pArgs->targetLimit-target);
    1220           0 :     if(count>diff) {
    1221           0 :         count=diff;
    1222             :     }
    1223           0 :     while(count>0) {
    1224           0 :         if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
    1225           0 :             c=prev+(c-BOCU1_MIDDLE);
    1226           0 :             if(c<0x3000) {
    1227           0 :                 *target++=(UChar)c;
    1228           0 :                 prev=BOCU1_SIMPLE_PREV(c);
    1229             :             } else {
    1230           0 :                 break;
    1231             :             }
    1232           0 :         } else if(c<=0x20) {
    1233           0 :             if(c!=0x20) {
    1234           0 :                 prev=BOCU1_ASCII_PREV;
    1235             :             }
    1236           0 :             *target++=(UChar)c;
    1237             :         } else {
    1238           0 :             break;
    1239             :         }
    1240           0 :         ++source;
    1241           0 :         --count;
    1242             :     }
    1243             : 
    1244             :     /* decode a sequence of single and lead bytes */
    1245           0 :     while(source<sourceLimit) {
    1246           0 :         if(target>=targetLimit) {
    1247             :             /* target is full */
    1248           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1249           0 :             break;
    1250             :         }
    1251             : 
    1252           0 :         c=*source++;
    1253           0 :         if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
    1254             :             /* Write a code point directly from a single-byte difference. */
    1255           0 :             c=prev+(c-BOCU1_MIDDLE);
    1256           0 :             if(c<0x3000) {
    1257           0 :                 *target++=(UChar)c;
    1258           0 :                 prev=BOCU1_SIMPLE_PREV(c);
    1259           0 :                 goto fastSingle;
    1260             :             }
    1261           0 :         } else if(c<=0x20) {
    1262             :             /*
    1263             :              * Direct-encoded C0 control code or space.
    1264             :              * Reset prev for C0 control codes but not for space.
    1265             :              */
    1266           0 :             if(c!=0x20) {
    1267           0 :                 prev=BOCU1_ASCII_PREV;
    1268             :             }
    1269           0 :             *target++=(UChar)c;
    1270           0 :             continue;
    1271           0 :         } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
    1272             :             /* Optimize two-byte case. */
    1273           0 :             if(c>=BOCU1_MIDDLE) {
    1274           0 :                 diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
    1275             :             } else {
    1276           0 :                 diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
    1277             :             }
    1278             : 
    1279             :             /* trail byte */
    1280           0 :             c=decodeBocu1TrailByte(1, *source++);
    1281           0 :             if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
    1282           0 :                 bytes[0]=source[-2];
    1283           0 :                 bytes[1]=source[-1];
    1284           0 :                 byteIndex=2;
    1285           0 :                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1286           0 :                 break;
    1287             :             }
    1288           0 :         } else if(c==BOCU1_RESET) {
    1289             :             /* only reset the state, no code point */
    1290           0 :             prev=BOCU1_ASCII_PREV;
    1291           0 :             continue;
    1292             :         } else {
    1293             :             /*
    1294             :              * For multi-byte difference lead bytes, set the decoder state
    1295             :              * with the partial difference value from the lead byte and
    1296             :              * with the number of trail bytes.
    1297             :              */
    1298           0 :             bytes[0]=(uint8_t)c;
    1299           0 :             byteIndex=1;
    1300             : 
    1301           0 :             diff=decodeBocu1LeadByte(c);
    1302           0 :             count=diff&3;
    1303           0 :             diff>>=2;
    1304             : getTrail:
    1305             :             for(;;) {
    1306           0 :                 if(source>=sourceLimit) {
    1307           0 :                     goto endloop;
    1308             :                 }
    1309           0 :                 c=bytes[byteIndex++]=*source++;
    1310             : 
    1311             :                 /* trail byte in any position */
    1312           0 :                 c=decodeBocu1TrailByte(count, c);
    1313           0 :                 if(c<0) {
    1314           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1315           0 :                     goto endloop;
    1316             :                 }
    1317             : 
    1318           0 :                 diff+=c;
    1319           0 :                 if(--count==0) {
    1320             :                     /* final trail byte, deliver a code point */
    1321           0 :                     byteIndex=0;
    1322           0 :                     c=prev+diff;
    1323           0 :                     if((uint32_t)c>0x10ffff) {
    1324           0 :                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1325           0 :                         goto endloop;
    1326             :                     }
    1327           0 :                     break;
    1328             :                 }
    1329             :             }
    1330             :         }
    1331             : 
    1332             :         /* calculate the next prev and output c */
    1333           0 :         prev=BOCU1_PREV(c);
    1334           0 :         if(c<=0xffff) {
    1335           0 :             *target++=(UChar)c;
    1336             :         } else {
    1337             :             /* output surrogate pair */
    1338           0 :             *target++=U16_LEAD(c);
    1339           0 :             if(target<targetLimit) {
    1340           0 :                 *target++=U16_TRAIL(c);
    1341             :             } else {
    1342             :                 /* target overflow */
    1343           0 :                 cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
    1344           0 :                 cnv->UCharErrorBufferLength=1;
    1345           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1346           0 :                 break;
    1347             :             }
    1348             :         }
    1349             :     }
    1350             : endloop:
    1351             : 
    1352           0 :     if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
    1353             :         /* set the converter state in UConverter to deal with the next character */
    1354           0 :         cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
    1355           0 :         cnv->mode=0;
    1356             :     } else {
    1357             :         /* set the converter state back into UConverter */
    1358           0 :         cnv->toUnicodeStatus=(uint32_t)prev;
    1359           0 :         cnv->mode=(diff<<2)|count;
    1360             :     }
    1361           0 :     cnv->toULength=byteIndex;
    1362             : 
    1363             :     /* write back the updated pointers */
    1364           0 :     pArgs->source=(const char *)source;
    1365           0 :     pArgs->target=target;
    1366           0 :     return;
    1367             : }
    1368             : 
    1369             : /* miscellaneous ------------------------------------------------------------ */
    1370             : 
    1371             : static const UConverterImpl _Bocu1Impl={
    1372             :     UCNV_BOCU1,
    1373             : 
    1374             :     NULL,
    1375             :     NULL,
    1376             : 
    1377             :     NULL,
    1378             :     NULL,
    1379             :     NULL,
    1380             : 
    1381             :     _Bocu1ToUnicode,
    1382             :     _Bocu1ToUnicodeWithOffsets,
    1383             :     _Bocu1FromUnicode,
    1384             :     _Bocu1FromUnicodeWithOffsets,
    1385             :     NULL,
    1386             : 
    1387             :     NULL,
    1388             :     NULL,
    1389             :     NULL,
    1390             :     NULL,
    1391             :     ucnv_getCompleteUnicodeSet,
    1392             : 
    1393             :     NULL,
    1394             :     NULL
    1395             : };
    1396             : 
    1397             : static const UConverterStaticData _Bocu1StaticData={
    1398             :     sizeof(UConverterStaticData),
    1399             :     "BOCU-1",
    1400             :     1214, /* CCSID for BOCU-1 */
    1401             :     UCNV_IBM, UCNV_BOCU1,
    1402             :     1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
    1403             :     { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
    1404             :     FALSE, FALSE,
    1405             :     0,
    1406             :     0,
    1407             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1408             : };
    1409             : 
    1410             : const UConverterSharedData _Bocu1Data=
    1411             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
    1412             : 
    1413             : #endif

Generated by: LCOV version 1.13