LCOV - output.info - intl/icu/source/common/ucnv

LCOV - code coverage report

Current view:	top level - intl/icu/source/common - ucnv_u7.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	599	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	7	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*  
       4             : **********************************************************************
       5             : *   Copyright (C) 2002-2016, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : *   file name:  ucnv_u7.c
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2002jul01
      14             : *   created by: Markus W. Scherer
      15             : *
      16             : *   UTF-7 converter implementation. Used to be in ucnv_utf.c.
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : 
      21             : #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
      22             : 
      23             : #include "cmemory.h"
      24             : #include "unicode/ucnv.h"
      25             : #include "ucnv_bld.h"
      26             : #include "ucnv_cnv.h"
      27             : #include "uassert.h"
      28             : 
      29             : /* UTF-7 -------------------------------------------------------------------- */
      30             : 
      31             : /*
      32             :  * UTF-7 is a stateful encoding of Unicode.
      33             :  * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt)
      34             :  * It was intended for use in Internet email systems, using in its bytewise
      35             :  * encoding only a subset of 7-bit US-ASCII.
      36             :  * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still
      37             :  * occasionally used.
      38             :  *
      39             :  * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII
      40             :  * characters directly or in base64. Especially, the characters in set O
      41             :  * as defined in the RFC (see below) may be encoded directly but are not
      42             :  * allowed in, e.g., email headers.
      43             :  * By default, the ICU UTF-7 converter encodes set O directly.
      44             :  * By choosing the option "version=1", set O will be escaped instead.
      45             :  * For example:
      46             :  *     utf7Converter=ucnv_open("UTF-7,version=1");
      47             :  *
      48             :  * For details about email headers see RFC 2047.
      49             :  */
      50             : 
      51             : /*
      52             :  * Tests for US-ASCII characters belonging to character classes
      53             :  * defined in UTF-7.
      54             :  *
      55             :  * Set D (directly encoded characters) consists of the following
      56             :  * characters: the upper and lower case letters A through Z
      57             :  * and a through z, the 10 digits 0-9, and the following nine special
      58             :  * characters (note that "+" and "=" are omitted):
      59             :  *     '(),-./:?
      60             :  *
      61             :  * Set O (optional direct characters) consists of the following
      62             :  * characters (note that "\" and "~" are omitted):
      63             :  *     !"#$%&*;<=>@[]^_`{|}
      64             :  *
      65             :  * According to the rules in RFC 2152, the byte values for the following
      66             :  * US-ASCII characters are not used in UTF-7 and are therefore illegal:
      67             :  * - all C0 control codes except for CR LF TAB
      68             :  * - BACKSLASH
      69             :  * - TILDE
      70             :  * - DEL
      71             :  * - all codes beyond US-ASCII, i.e. all >127
      72             :  */
      73             : #define inSetD(c) \
      74             :     ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \
      75             :      (uint8_t)((c)-48)<10 ||    /* digits */ \
      76             :      (uint8_t)((c)-39)<3 ||     /* '() */ \
      77             :      (uint8_t)((c)-44)<4 ||     /* ,-./ */ \
      78             :      (c)==58 || (c)==63         /* :? */ \
      79             :     )
      80             : 
      81             : #define inSetO(c) \
      82             :     ((uint8_t)((c)-33)<6 ||         /* !"#$%& */ \
      83             :      (uint8_t)((c)-59)<4 ||         /* ;<=> */ \
      84             :      (uint8_t)((c)-93)<4 ||         /* ]^_` */ \
      85             :      (uint8_t)((c)-123)<3 ||        /* {|} */ \
      86             :      (c)==42 || (c)==64 || (c)==91  /* *@[ */ \
      87             :     )
      88             : 
      89             : #define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9)
      90             : #define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9)
      91             : 
      92             : #define PLUS  43
      93             : #define MINUS 45
      94             : #define BACKSLASH 92
      95             : #define TILDE 126
      96             : 
      97             : /* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */
      98             : #define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c))
      99             : 
     100             : /* encode directly sets D and O and CR LF SP TAB */
     101             : static const UBool encodeDirectlyMaximum[128]={
     102             :  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
     103             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
     104             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     105             : 
     106             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
     107             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     108             : 
     109             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     110             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
     111             : 
     112             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     113             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
     114             : };
     115             : 
     116             : /* encode directly set D and CR LF SP TAB but not set O */
     117             : static const UBool encodeDirectlyRestricted[128]={
     118             :  /* 0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
     119             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
     120             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     121             : 
     122             :     1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
     123             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
     124             : 
     125             :     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     126             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
     127             : 
     128             :     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     129             :     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
     130             : };
     131             : 
     132             : static const uint8_t
     133             : toBase64[64]={
     134             :     /* A-Z */
     135             :     65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
     136             :     78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
     137             :     /* a-z */
     138             :     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
     139             :     110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122,
     140             :     /* 0-9 */
     141             :     48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
     142             :     /* +/ */
     143             :     43, 47
     144             : };
     145             : 
     146             : static const int8_t
     147             : fromBase64[128]={
     148             :     /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
     149             :     -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3,
     150             :     -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
     151             : 
     152             :     /* general punctuation with + and / and a special value (-2) for - */
     153             :     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63,
     154             :     /* digits */
     155             :     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
     156             : 
     157             :     /* A-Z */
     158             :     -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
     159             :     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
     160             : 
     161             :     /* a-z */
     162             :     -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
     163             :     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
     164             : };
     165             : 
     166             : /*
     167             :  * converter status values:
     168             :  *
     169             :  * toUnicodeStatus:
     170             :  *     24 inDirectMode (boolean)
     171             :  * 23..16 base64Counter (-1..7)
     172             :  * 15..0  bits (up to 14 bits incoming base64)
     173             :  *
     174             :  * fromUnicodeStatus:
     175             :  * 31..28 version (0: set O direct  1: set O escaped)
     176             :  *     24 inDirectMode (boolean)
     177             :  * 23..16 base64Counter (0..2)
     178             :  *  7..0  bits (6 bits outgoing base64)
     179             :  *
     180             :  */
     181             : 
     182             : U_CDECL_BEGIN
     183             : static void U_CALLCONV
     184           0 : _UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
     185           0 :     if(choice<=UCNV_RESET_TO_UNICODE) {
     186             :         /* reset toUnicode */
     187           0 :         cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
     188           0 :         cnv->toULength=0;
     189             :     }
     190           0 :     if(choice!=UCNV_RESET_TO_UNICODE) {
     191             :         /* reset fromUnicode */
     192           0 :         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
     193             :     }
     194           0 : }
     195             : 
     196             : static void U_CALLCONV
     197           0 : _UTF7Open(UConverter *cnv,
     198             :           UConverterLoadArgs *pArgs,
     199             :           UErrorCode *pErrorCode) {
     200             :     (void)pArgs;
     201           0 :     if(UCNV_GET_VERSION(cnv)<=1) {
     202             :         /* TODO(markus): Should just use cnv->options rather than copying the version number. */
     203           0 :         cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
     204           0 :         _UTF7Reset(cnv, UCNV_RESET_BOTH);
     205             :     } else {
     206           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     207             :     }
     208           0 : }
     209             : 
     210             : static void U_CALLCONV
     211           0 : _UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     212             :                           UErrorCode *pErrorCode) {
     213             :     UConverter *cnv;
     214             :     const uint8_t *source, *sourceLimit;
     215             :     UChar *target;
     216             :     const UChar *targetLimit;
     217             :     int32_t *offsets;
     218             : 
     219             :     uint8_t *bytes;
     220             :     uint8_t byteIndex;
     221             : 
     222             :     int32_t length, targetCapacity;
     223             : 
     224             :     /* UTF-7 state */
     225             :     uint16_t bits;
     226             :     int8_t base64Counter;
     227             :     UBool inDirectMode;
     228             : 
     229             :     int8_t base64Value;
     230             : 
     231             :     int32_t sourceIndex, nextSourceIndex;
     232             : 
     233             :     uint8_t b;
     234             :     /* set up the local pointers */
     235           0 :     cnv=pArgs->converter;
     236             : 
     237           0 :     source=(const uint8_t *)pArgs->source;
     238           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
     239           0 :     target=pArgs->target;
     240           0 :     targetLimit=pArgs->targetLimit;
     241           0 :     offsets=pArgs->offsets;
     242             :     /* get the state machine state */
     243             :     {
     244           0 :         uint32_t status=cnv->toUnicodeStatus;
     245           0 :         inDirectMode=(UBool)((status>>24)&1);
     246           0 :         base64Counter=(int8_t)(status>>16);
     247           0 :         bits=(uint16_t)status;
     248             :     }
     249           0 :     bytes=cnv->toUBytes;
     250           0 :     byteIndex=cnv->toULength;
     251             : 
     252             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     253           0 :     sourceIndex=byteIndex==0 ? 0 : -1;
     254           0 :     nextSourceIndex=0;
     255             : 
     256           0 :     if(inDirectMode) {
     257             : directMode:
     258             :         /*
     259             :          * In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
     260             :          * with their US-ASCII byte values.
     261             :          * Backslash and Tilde and most control characters are not allowed in UTF-7.
     262             :          * A plus sign starts Unicode (or "escape") Mode.
     263             :          *
     264             :          * In Direct Mode, only the sourceIndex is used.
     265             :          */
     266           0 :         byteIndex=0;
     267           0 :         length=(int32_t)(sourceLimit-source);
     268           0 :         targetCapacity=(int32_t)(targetLimit-target);
     269           0 :         if(length>targetCapacity) {
     270           0 :             length=targetCapacity;
     271             :         }
     272           0 :         while(length>0) {
     273           0 :             b=*source++;
     274           0 :             if(!isLegalUTF7(b)) {
     275             :                 /* illegal */
     276           0 :                 bytes[0]=b;
     277           0 :                 byteIndex=1;
     278           0 :                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     279           0 :                 break;
     280           0 :             } else if(b!=PLUS) {
     281             :                 /* write directly encoded character */
     282           0 :                 *target++=b;
     283           0 :                 if(offsets!=NULL) {
     284           0 :                     *offsets++=sourceIndex++;
     285             :                 }
     286             :             } else /* PLUS */ {
     287             :                 /* switch to Unicode mode */
     288           0 :                 nextSourceIndex=++sourceIndex;
     289           0 :                 inDirectMode=FALSE;
     290           0 :                 byteIndex=0;
     291           0 :                 bits=0;
     292           0 :                 base64Counter=-1;
     293           0 :                 goto unicodeMode;
     294             :             }
     295           0 :             --length;
     296             :         }
     297           0 :         if(source<sourceLimit && target>=targetLimit) {
     298             :             /* target is full */
     299           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     300             :         }
     301             :     } else {
     302             : unicodeMode:
     303             :         /*
     304             :          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
     305             :          * The base64 sequence ends with any character that is not in the base64 alphabet.
     306             :          * A terminating minus sign is consumed.
     307             :          *
     308             :          * In Unicode Mode, the sourceIndex has the index to the start of the current
     309             :          * base64 bytes, while nextSourceIndex is precisely parallel to source,
     310             :          * keeping the index to the following byte.
     311             :          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
     312             :          */
     313           0 :         while(source<sourceLimit) {
     314           0 :             if(target<targetLimit) {
     315           0 :                 bytes[byteIndex++]=b=*source++;
     316           0 :                 ++nextSourceIndex;
     317           0 :                 base64Value = -3; /* initialize as illegal */
     318           0 :                 if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
     319             :                     /* either
     320             :                      * base64Value==-1 for any legal character except base64 and minus sign, or
     321             :                      * base64Value==-3 for illegal characters:
     322             :                      * 1. In either case, leave Unicode mode.
     323             :                      * 2.1. If we ended with an incomplete UChar or none after the +, then
     324             :                      *      generate an error for the preceding erroneous sequence and deal with
     325             :                      *      the current (possibly illegal) character next time through.
     326             :                      * 2.2. Else the current char comes after a complete UChar, which was already
     327             :                      *      pushed to the output buf, so:
     328             :                      * 2.2.1. If the current char is legal, just save it for processing next time.
     329             :                      *        It may be for example, a plus which we need to deal with in direct mode.
     330             :                      * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
     331             :                      */
     332           0 :                     inDirectMode=TRUE;
     333           0 :                     if(base64Counter==-1) {
     334             :                         /* illegal: + immediately followed by something other than base64 or minus sign */
     335             :                         /* include the plus sign in the reported sequence, but not the subsequent char */
     336           0 :                         --source;
     337           0 :                         bytes[0]=PLUS;
     338           0 :                         byteIndex=1;
     339           0 :                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     340           0 :                         break;
     341           0 :                     } else if(bits!=0) {
     342             :                         /* bits are illegally left over, a UChar is incomplete */
     343             :                         /* don't include current char (legal or illegal) in error seq */
     344           0 :                         --source;
     345           0 :                         --byteIndex;
     346           0 :                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     347           0 :                         break;
     348             :                     } else {
     349             :                         /* previous UChar was complete */
     350           0 :                         if(base64Value==-3) {
     351             :                             /* current character is illegal, deal with it here */
     352           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     353           0 :                             break;
     354             :                         } else {
     355             :                             /* un-read the current character in case it is a plus sign */
     356           0 :                             --source;
     357           0 :                             sourceIndex=nextSourceIndex-1;
     358           0 :                             goto directMode;
     359             :                         }
     360             :                     }
     361           0 :                 } else if(base64Value>=0) {
     362             :                     /* collect base64 bytes into UChars */
     363           0 :                     switch(base64Counter) {
     364             :                     case -1: /* -1 is immediately after the + */
     365             :                     case 0:
     366           0 :                         bits=base64Value;
     367           0 :                         base64Counter=1;
     368           0 :                         break;
     369             :                     case 1:
     370             :                     case 3:
     371             :                     case 4:
     372             :                     case 6:
     373           0 :                         bits=(uint16_t)((bits<<6)|base64Value);
     374           0 :                         ++base64Counter;
     375           0 :                         break;
     376             :                     case 2:
     377           0 :                         *target++=(UChar)((bits<<4)|(base64Value>>2));
     378           0 :                         if(offsets!=NULL) {
     379           0 :                             *offsets++=sourceIndex;
     380           0 :                             sourceIndex=nextSourceIndex-1;
     381             :                         }
     382           0 :                         bytes[0]=b; /* keep this byte in case an error occurs */
     383           0 :                         byteIndex=1;
     384           0 :                         bits=(uint16_t)(base64Value&3);
     385           0 :                         base64Counter=3;
     386           0 :                         break;
     387             :                     case 5:
     388           0 :                         *target++=(UChar)((bits<<2)|(base64Value>>4));
     389           0 :                         if(offsets!=NULL) {
     390           0 :                             *offsets++=sourceIndex;
     391           0 :                             sourceIndex=nextSourceIndex-1;
     392             :                         }
     393           0 :                         bytes[0]=b; /* keep this byte in case an error occurs */
     394           0 :                         byteIndex=1;
     395           0 :                         bits=(uint16_t)(base64Value&15);
     396           0 :                         base64Counter=6;
     397           0 :                         break;
     398             :                     case 7:
     399           0 :                         *target++=(UChar)((bits<<6)|base64Value);
     400           0 :                         if(offsets!=NULL) {
     401           0 :                             *offsets++=sourceIndex;
     402           0 :                             sourceIndex=nextSourceIndex;
     403             :                         }
     404           0 :                         byteIndex=0;
     405           0 :                         bits=0;
     406           0 :                         base64Counter=0;
     407           0 :                         break;
     408             :                     default:
     409             :                         /* will never occur */
     410           0 :                         break;
     411             :                     }
     412             :                 } else /*base64Value==-2*/ {
     413             :                     /* minus sign terminates the base64 sequence */
     414           0 :                     inDirectMode=TRUE;
     415           0 :                     if(base64Counter==-1) {
     416             :                         /* +- i.e. a minus immediately following a plus */
     417           0 :                         *target++=PLUS;
     418           0 :                         if(offsets!=NULL) {
     419           0 :                             *offsets++=sourceIndex-1;
     420             :                         }
     421             :                     } else {
     422             :                         /* absorb the minus and leave the Unicode Mode */
     423           0 :                         if(bits!=0) {
     424             :                             /* bits are illegally left over, a UChar is incomplete */
     425           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     426           0 :                             break;
     427             :                         }
     428             :                     }
     429           0 :                     sourceIndex=nextSourceIndex;
     430           0 :                     goto directMode;
     431             :                 }
     432             :             } else {
     433             :                 /* target is full */
     434           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     435           0 :                 break;
     436             :             }
     437             :         }
     438             :     }
     439             : 
     440           0 :     if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) {
     441             :         /*
     442             :          * if we are in Unicode mode, then the byteIndex might not be 0,
     443             :          * but that is ok if bits==0
     444             :          * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
     445             :          * (not true for IMAP-mailbox-name where we must end in direct mode)
     446             :          */
     447           0 :         byteIndex=0;
     448             :     }
     449             : 
     450             :     /* set the converter state back into UConverter */
     451           0 :     cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
     452           0 :     cnv->toULength=byteIndex;
     453             : 
     454             :     /* write back the updated pointers */
     455           0 :     pArgs->source=(const char *)source;
     456           0 :     pArgs->target=target;
     457           0 :     pArgs->offsets=offsets;
     458           0 :     return;
     459             : }
     460             : 
     461             : static void U_CALLCONV
     462           0 : _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     463             :                             UErrorCode *pErrorCode) {
     464             :     UConverter *cnv;
     465             :     const UChar *source, *sourceLimit;
     466             :     uint8_t *target, *targetLimit;
     467             :     int32_t *offsets;
     468             : 
     469             :     int32_t length, targetCapacity, sourceIndex;
     470             :     UChar c;
     471             : 
     472             :     /* UTF-7 state */
     473             :     const UBool *encodeDirectly;
     474             :     uint8_t bits;
     475             :     int8_t base64Counter;
     476             :     UBool inDirectMode;
     477             : 
     478             :     /* set up the local pointers */
     479           0 :     cnv=pArgs->converter;
     480             : 
     481             :     /* set up the local pointers */
     482           0 :     source=pArgs->source;
     483           0 :     sourceLimit=pArgs->sourceLimit;
     484           0 :     target=(uint8_t *)pArgs->target;
     485           0 :     targetLimit=(uint8_t *)pArgs->targetLimit;
     486           0 :     offsets=pArgs->offsets;
     487             : 
     488             :     /* get the state machine state */
     489             :     {
     490           0 :         uint32_t status=cnv->fromUnicodeStatus;
     491           0 :         encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted;
     492           0 :         inDirectMode=(UBool)((status>>24)&1);
     493           0 :         base64Counter=(int8_t)(status>>16);
     494           0 :         bits=(uint8_t)status;
     495           0 :         U_ASSERT(bits<=UPRV_LENGTHOF(toBase64));
     496             :     }
     497             : 
     498             :     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
     499           0 :     sourceIndex=0;
     500             : 
     501           0 :     if(inDirectMode) {
     502             : directMode:
     503           0 :         length=(int32_t)(sourceLimit-source);
     504           0 :         targetCapacity=(int32_t)(targetLimit-target);
     505           0 :         if(length>targetCapacity) {
     506           0 :             length=targetCapacity;
     507             :         }
     508           0 :         while(length>0) {
     509           0 :             c=*source++;
     510             :             /* currently always encode CR LF SP TAB directly */
     511           0 :             if(c<=127 && encodeDirectly[c]) {
     512             :                 /* encode directly */
     513           0 :                 *target++=(uint8_t)c;
     514           0 :                 if(offsets!=NULL) {
     515           0 :                     *offsets++=sourceIndex++;
     516             :                 }
     517           0 :             } else if(c==PLUS) {
     518             :                 /* output +- for + */
     519           0 :                 *target++=PLUS;
     520           0 :                 if(target<targetLimit) {
     521           0 :                     *target++=MINUS;
     522           0 :                     if(offsets!=NULL) {
     523           0 :                         *offsets++=sourceIndex;
     524           0 :                         *offsets++=sourceIndex++;
     525             :                     }
     526             :                     /* realign length and targetCapacity */
     527           0 :                     goto directMode;
     528             :                 } else {
     529           0 :                     if(offsets!=NULL) {
     530           0 :                         *offsets++=sourceIndex++;
     531             :                     }
     532           0 :                     cnv->charErrorBuffer[0]=MINUS;
     533           0 :                     cnv->charErrorBufferLength=1;
     534           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     535           0 :                     break;
     536             :                 }
     537             :             } else {
     538             :                 /* un-read this character and switch to Unicode Mode */
     539           0 :                 --source;
     540           0 :                 *target++=PLUS;
     541           0 :                 if(offsets!=NULL) {
     542           0 :                     *offsets++=sourceIndex;
     543             :                 }
     544           0 :                 inDirectMode=FALSE;
     545           0 :                 base64Counter=0;
     546           0 :                 goto unicodeMode;
     547             :             }
     548           0 :             --length;
     549             :         }
     550           0 :         if(source<sourceLimit && target>=targetLimit) {
     551             :             /* target is full */
     552           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     553             :         }
     554             :     } else {
     555             : unicodeMode:
     556           0 :         while(source<sourceLimit) {
     557           0 :             if(target<targetLimit) {
     558           0 :                 c=*source++;
     559           0 :                 if(c<=127 && encodeDirectly[c]) {
     560             :                     /* encode directly */
     561           0 :                     inDirectMode=TRUE;
     562             : 
     563             :                     /* trick: back out this character to make this easier */
     564           0 :                     --source;
     565             : 
     566             :                     /* terminate the base64 sequence */
     567           0 :                     if(base64Counter!=0) {
     568             :                         /* write remaining bits for the previous character */
     569           0 :                         *target++=toBase64[bits];
     570           0 :                         if(offsets!=NULL) {
     571           0 :                             *offsets++=sourceIndex-1;
     572             :                         }
     573             :                     }
     574           0 :                     if(fromBase64[c]!=-1) {
     575             :                         /* need to terminate with a minus */
     576           0 :                         if(target<targetLimit) {
     577           0 :                             *target++=MINUS;
     578           0 :                             if(offsets!=NULL) {
     579           0 :                                 *offsets++=sourceIndex-1;
     580             :                             }
     581             :                         } else {
     582           0 :                             cnv->charErrorBuffer[0]=MINUS;
     583           0 :                             cnv->charErrorBufferLength=1;
     584           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     585           0 :                             break;
     586             :                         }
     587             :                     }
     588           0 :                     goto directMode;
     589             :                 } else {
     590             :                     /*
     591             :                      * base64 this character:
     592             :                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character
     593             :                      * and the bits of this character, each implicitly in UTF-16BE.
     594             :                      *
     595             :                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
     596             :                      * character to the next. The actual 2 or 4 bits are shifted to the left edge
     597             :                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
     598             :                      */
     599           0 :                     switch(base64Counter) {
     600             :                     case 0:
     601           0 :                         *target++=toBase64[c>>10];
     602           0 :                         if(target<targetLimit) {
     603           0 :                             *target++=toBase64[(c>>4)&0x3f];
     604           0 :                             if(offsets!=NULL) {
     605           0 :                                 *offsets++=sourceIndex;
     606           0 :                                 *offsets++=sourceIndex++;
     607             :                             }
     608             :                         } else {
     609           0 :                             if(offsets!=NULL) {
     610           0 :                                 *offsets++=sourceIndex++;
     611             :                             }
     612           0 :                             cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f];
     613           0 :                             cnv->charErrorBufferLength=1;
     614           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     615             :                         }
     616           0 :                         bits=(uint8_t)((c&15)<<2);
     617           0 :                         base64Counter=1;
     618           0 :                         break;
     619             :                     case 1:
     620           0 :                         *target++=toBase64[bits|(c>>14)];
     621           0 :                         if(target<targetLimit) {
     622           0 :                             *target++=toBase64[(c>>8)&0x3f];
     623           0 :                             if(target<targetLimit) {
     624           0 :                                 *target++=toBase64[(c>>2)&0x3f];
     625           0 :                                 if(offsets!=NULL) {
     626           0 :                                     *offsets++=sourceIndex;
     627           0 :                                     *offsets++=sourceIndex;
     628           0 :                                     *offsets++=sourceIndex++;
     629             :                                 }
     630             :                             } else {
     631           0 :                                 if(offsets!=NULL) {
     632           0 :                                     *offsets++=sourceIndex;
     633           0 :                                     *offsets++=sourceIndex++;
     634             :                                 }
     635           0 :                                 cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f];
     636           0 :                                 cnv->charErrorBufferLength=1;
     637           0 :                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     638             :                             }
     639             :                         } else {
     640           0 :                             if(offsets!=NULL) {
     641           0 :                                 *offsets++=sourceIndex++;
     642             :                             }
     643           0 :                             cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f];
     644           0 :                             cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f];
     645           0 :                             cnv->charErrorBufferLength=2;
     646           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     647             :                         }
     648           0 :                         bits=(uint8_t)((c&3)<<4);
     649           0 :                         base64Counter=2;
     650           0 :                         break;
     651             :                     case 2:
     652           0 :                         *target++=toBase64[bits|(c>>12)];
     653           0 :                         if(target<targetLimit) {
     654           0 :                             *target++=toBase64[(c>>6)&0x3f];
     655           0 :                             if(target<targetLimit) {
     656           0 :                                 *target++=toBase64[c&0x3f];
     657           0 :                                 if(offsets!=NULL) {
     658           0 :                                     *offsets++=sourceIndex;
     659           0 :                                     *offsets++=sourceIndex;
     660           0 :                                     *offsets++=sourceIndex++;
     661             :                                 }
     662             :                             } else {
     663           0 :                                 if(offsets!=NULL) {
     664           0 :                                     *offsets++=sourceIndex;
     665           0 :                                     *offsets++=sourceIndex++;
     666             :                                 }
     667           0 :                                 cnv->charErrorBuffer[0]=toBase64[c&0x3f];
     668           0 :                                 cnv->charErrorBufferLength=1;
     669           0 :                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     670             :                             }
     671             :                         } else {
     672           0 :                             if(offsets!=NULL) {
     673           0 :                                 *offsets++=sourceIndex++;
     674             :                             }
     675           0 :                             cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f];
     676           0 :                             cnv->charErrorBuffer[1]=toBase64[c&0x3f];
     677           0 :                             cnv->charErrorBufferLength=2;
     678           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     679             :                         }
     680           0 :                         bits=0;
     681           0 :                         base64Counter=0;
     682           0 :                         break;
     683             :                     default:
     684             :                         /* will never occur */
     685           0 :                         break;
     686             :                     }
     687             :                 }
     688             :             } else {
     689             :                 /* target is full */
     690           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     691           0 :                 break;
     692             :             }
     693             :         }
     694             :     }
     695             : 
     696           0 :     if(pArgs->flush && source>=sourceLimit) {
     697             :         /* flush remaining bits to the target */
     698           0 :         if(!inDirectMode) {
     699           0 :             if (base64Counter!=0) {
     700           0 :                 if(target<targetLimit) {
     701           0 :                     *target++=toBase64[bits];
     702           0 :                     if(offsets!=NULL) {
     703           0 :                         *offsets++=sourceIndex-1;
     704             :                     }
     705             :                 } else {
     706           0 :                     cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
     707           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     708             :                 }
     709             :             }
     710             :             /* Add final MINUS to terminate unicodeMode */
     711           0 :             if(target<targetLimit) {
     712           0 :                 *target++=MINUS;
     713           0 :                 if(offsets!=NULL) {
     714           0 :                     *offsets++=sourceIndex-1;
     715             :                 }
     716             :             } else {
     717           0 :                 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
     718           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     719             :             }
     720             :         }
     721             :         /* reset the state for the next conversion */
     722           0 :         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
     723             :     } else {
     724             :         /* set the converter state back into UConverter */
     725           0 :         cnv->fromUnicodeStatus=
     726           0 :             (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
     727           0 :             ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
     728             :     }
     729             : 
     730             :     /* write back the updated pointers */
     731           0 :     pArgs->source=source;
     732           0 :     pArgs->target=(char *)target;
     733           0 :     pArgs->offsets=offsets;
     734           0 :     return;
     735             : }
     736             : 
     737             : static const char * U_CALLCONV
     738           0 : _UTF7GetName(const UConverter *cnv) {
     739           0 :     switch(cnv->fromUnicodeStatus>>28) {
     740             :     case 1:
     741           0 :         return "UTF-7,version=1";
     742             :     default:
     743           0 :         return "UTF-7";
     744             :     }
     745             : }
     746             : U_CDECL_END
     747             : 
     748             : static const UConverterImpl _UTF7Impl={
     749             :     UCNV_UTF7,
     750             : 
     751             :     NULL,
     752             :     NULL,
     753             : 
     754             :     _UTF7Open,
     755             :     NULL,
     756             :     _UTF7Reset,
     757             : 
     758             :     _UTF7ToUnicodeWithOffsets,
     759             :     _UTF7ToUnicodeWithOffsets,
     760             :     _UTF7FromUnicodeWithOffsets,
     761             :     _UTF7FromUnicodeWithOffsets,
     762             :     NULL,
     763             : 
     764             :     NULL,
     765             :     _UTF7GetName,
     766             :     NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
     767             :     NULL,
     768             :     ucnv_getCompleteUnicodeSet,
     769             : 
     770             :     NULL,
     771             :     NULL
     772             : };
     773             : 
     774             : static const UConverterStaticData _UTF7StaticData={
     775             :     sizeof(UConverterStaticData),
     776             :     "UTF-7",
     777             :     0, /* TODO CCSID for UTF-7 */
     778             :     UCNV_IBM, UCNV_UTF7,
     779             :     1, 4,
     780             :     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
     781             :     FALSE, FALSE,
     782             :     0,
     783             :     0,
     784             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     785             : };
     786             : 
     787             : const UConverterSharedData _UTF7Data=
     788             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl);
     789             : 
     790             : /* IMAP mailbox name encoding ----------------------------------------------- */
     791             : 
     792             : /*
     793             :  * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
     794             :  * http://www.ietf.org/rfc/rfc2060.txt
     795             :  *
     796             :  * 5.1.3.  Mailbox International Naming Convention
     797             :  *
     798             :  * By convention, international mailbox names are specified using a
     799             :  * modified version of the UTF-7 encoding described in [UTF-7].  The
     800             :  * purpose of these modifications is to correct the following problems
     801             :  * with UTF-7:
     802             :  *
     803             :  *    1) UTF-7 uses the "+" character for shifting; this conflicts with
     804             :  *       the common use of "+" in mailbox names, in particular USENET
     805             :  *       newsgroup names.
     806             :  *
     807             :  *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
     808             :  *       conflicts with the use of "/" as a popular hierarchy delimiter.
     809             :  *
     810             :  *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
     811             :  *       the use of "\" as a popular hierarchy delimiter.
     812             :  *
     813             :  *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
     814             :  *       the use of "~" in some servers as a home directory indicator.
     815             :  *
     816             :  *    5) UTF-7 permits multiple alternate forms to represent the same
     817             :  *       string; in particular, printable US-ASCII chararacters can be
     818             :  *       represented in encoded form.
     819             :  *
     820             :  * In modified UTF-7, printable US-ASCII characters except for "&"
     821             :  * represent themselves; that is, characters with octet values 0x20-0x25
     822             :  * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
     823             :  * octet sequence "&-".
     824             :  *
     825             :  * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
     826             :  * Unicode 16-bit octets) are represented in modified BASE64, with a
     827             :  * further modification from [UTF-7] that "," is used instead of "/".
     828             :  * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
     829             :  * character which can represent itself.
     830             :  *
     831             :  * "&" is used to shift to modified BASE64 and "-" to shift back to US-
     832             :  * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
     833             :  * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
     834             :  * ").
     835             :  *
     836             :  * For example, here is a mailbox name which mixes English, Japanese,
     837             :  * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
     838             :  */
     839             : 
     840             : /*
     841             :  * Tests for US-ASCII characters belonging to character classes
     842             :  * defined in UTF-7.
     843             :  *
     844             :  * Set D (directly encoded characters) consists of the following
     845             :  * characters: the upper and lower case letters A through Z
     846             :  * and a through z, the 10 digits 0-9, and the following nine special
     847             :  * characters (note that "+" and "=" are omitted):
     848             :  *     '(),-./:?
     849             :  *
     850             :  * Set O (optional direct characters) consists of the following
     851             :  * characters (note that "\" and "~" are omitted):
     852             :  *     !"#$%&*;<=>@[]^_`{|}
     853             :  *
     854             :  * According to the rules in RFC 2152, the byte values for the following
     855             :  * US-ASCII characters are not used in UTF-7 and are therefore illegal:
     856             :  * - all C0 control codes except for CR LF TAB
     857             :  * - BACKSLASH
     858             :  * - TILDE
     859             :  * - DEL
     860             :  * - all codes beyond US-ASCII, i.e. all >127
     861             :  */
     862             : 
     863             : /* uses '&' not '+' to start a base64 sequence */
     864             : #define AMPERSAND 0x26
     865             : #define COMMA 0x2c
     866             : #define SLASH 0x2f
     867             : 
     868             : /* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
     869             : #define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
     870             : 
     871             : /* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
     872             : #define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
     873             : 
     874             : #define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
     875             : #define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
     876             : 
     877             : /*
     878             :  * converter status values:
     879             :  *
     880             :  * toUnicodeStatus:
     881             :  *     24 inDirectMode (boolean)
     882             :  * 23..16 base64Counter (-1..7)
     883             :  * 15..0  bits (up to 14 bits incoming base64)
     884             :  *
     885             :  * fromUnicodeStatus:
     886             :  *     24 inDirectMode (boolean)
     887             :  * 23..16 base64Counter (0..2)
     888             :  *  7..0  bits (6 bits outgoing base64)
     889             :  *
     890             :  * ignore bits 31..25
     891             :  */
     892             : 
     893             : U_CDECL_BEGIN
     894             : static void U_CALLCONV
     895           0 : _IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     896             :                           UErrorCode *pErrorCode) {
     897             :     UConverter *cnv;
     898             :     const uint8_t *source, *sourceLimit;
     899             :     UChar *target;
     900             :     const UChar *targetLimit;
     901             :     int32_t *offsets;
     902             : 
     903             :     uint8_t *bytes;
     904             :     uint8_t byteIndex;
     905             : 
     906             :     int32_t length, targetCapacity;
     907             : 
     908             :     /* UTF-7 state */
     909             :     uint16_t bits;
     910             :     int8_t base64Counter;
     911             :     UBool inDirectMode;
     912             : 
     913             :     int8_t base64Value;
     914             : 
     915             :     int32_t sourceIndex, nextSourceIndex;
     916             : 
     917             :     UChar c;
     918             :     uint8_t b;
     919             : 
     920             :     /* set up the local pointers */
     921           0 :     cnv=pArgs->converter;
     922             : 
     923           0 :     source=(const uint8_t *)pArgs->source;
     924           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
     925           0 :     target=pArgs->target;
     926           0 :     targetLimit=pArgs->targetLimit;
     927           0 :     offsets=pArgs->offsets;
     928             :     /* get the state machine state */
     929             :     {
     930           0 :         uint32_t status=cnv->toUnicodeStatus;
     931           0 :         inDirectMode=(UBool)((status>>24)&1);
     932           0 :         base64Counter=(int8_t)(status>>16);
     933           0 :         bits=(uint16_t)status;
     934             :     }
     935           0 :     bytes=cnv->toUBytes;
     936           0 :     byteIndex=cnv->toULength;
     937             : 
     938             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     939           0 :     sourceIndex=byteIndex==0 ? 0 : -1;
     940           0 :     nextSourceIndex=0;
     941             : 
     942           0 :     if(inDirectMode) {
     943             : directMode:
     944             :         /*
     945             :          * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
     946             :          * with their US-ASCII byte values.
     947             :          * An ampersand starts Unicode (or "escape") Mode.
     948             :          *
     949             :          * In Direct Mode, only the sourceIndex is used.
     950             :          */
     951           0 :         byteIndex=0;
     952           0 :         length=(int32_t)(sourceLimit-source);
     953           0 :         targetCapacity=(int32_t)(targetLimit-target);
     954           0 :         if(length>targetCapacity) {
     955           0 :             length=targetCapacity;
     956             :         }
     957           0 :         while(length>0) {
     958           0 :             b=*source++;
     959           0 :             if(!isLegalIMAP(b)) {
     960             :                 /* illegal */
     961           0 :                 bytes[0]=b;
     962           0 :                 byteIndex=1;
     963           0 :                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     964           0 :                 break;
     965           0 :             } else if(b!=AMPERSAND) {
     966             :                 /* write directly encoded character */
     967           0 :                 *target++=b;
     968           0 :                 if(offsets!=NULL) {
     969           0 :                     *offsets++=sourceIndex++;
     970             :                 }
     971             :             } else /* AMPERSAND */ {
     972             :                 /* switch to Unicode mode */
     973           0 :                 nextSourceIndex=++sourceIndex;
     974           0 :                 inDirectMode=FALSE;
     975           0 :                 byteIndex=0;
     976           0 :                 bits=0;
     977           0 :                 base64Counter=-1;
     978           0 :                 goto unicodeMode;
     979             :             }
     980           0 :             --length;
     981             :         }
     982           0 :         if(source<sourceLimit && target>=targetLimit) {
     983             :             /* target is full */
     984           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     985             :         }
     986             :     } else {
     987             : unicodeMode:
     988             :         /*
     989             :          * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
     990             :          * The base64 sequence ends with any character that is not in the base64 alphabet.
     991             :          * A terminating minus sign is consumed.
     992             :          * US-ASCII must not be base64-ed.
     993             :          *
     994             :          * In Unicode Mode, the sourceIndex has the index to the start of the current
     995             :          * base64 bytes, while nextSourceIndex is precisely parallel to source,
     996             :          * keeping the index to the following byte.
     997             :          * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
     998             :          */
     999           0 :         while(source<sourceLimit) {
    1000           0 :             if(target<targetLimit) {
    1001           0 :                 bytes[byteIndex++]=b=*source++;
    1002           0 :                 ++nextSourceIndex;
    1003           0 :                 if(b>0x7e) {
    1004             :                     /* illegal - test other illegal US-ASCII values by base64Value==-3 */
    1005           0 :                     inDirectMode=TRUE;
    1006           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1007           0 :                     break;
    1008           0 :                 } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
    1009             :                     /* collect base64 bytes into UChars */
    1010           0 :                     switch(base64Counter) {
    1011             :                     case -1: /* -1 is immediately after the & */
    1012             :                     case 0:
    1013           0 :                         bits=base64Value;
    1014           0 :                         base64Counter=1;
    1015           0 :                         break;
    1016             :                     case 1:
    1017             :                     case 3:
    1018             :                     case 4:
    1019             :                     case 6:
    1020           0 :                         bits=(uint16_t)((bits<<6)|base64Value);
    1021           0 :                         ++base64Counter;
    1022           0 :                         break;
    1023             :                     case 2:
    1024           0 :                         c=(UChar)((bits<<4)|(base64Value>>2));
    1025           0 :                         if(isLegalIMAP(c)) {
    1026             :                             /* illegal */
    1027           0 :                             inDirectMode=TRUE;
    1028           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1029           0 :                             goto endloop;
    1030             :                         }
    1031           0 :                         *target++=c;
    1032           0 :                         if(offsets!=NULL) {
    1033           0 :                             *offsets++=sourceIndex;
    1034           0 :                             sourceIndex=nextSourceIndex-1;
    1035             :                         }
    1036           0 :                         bytes[0]=b; /* keep this byte in case an error occurs */
    1037           0 :                         byteIndex=1;
    1038           0 :                         bits=(uint16_t)(base64Value&3);
    1039           0 :                         base64Counter=3;
    1040           0 :                         break;
    1041             :                     case 5:
    1042           0 :                         c=(UChar)((bits<<2)|(base64Value>>4));
    1043           0 :                         if(isLegalIMAP(c)) {
    1044             :                             /* illegal */
    1045           0 :                             inDirectMode=TRUE;
    1046           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1047           0 :                             goto endloop;
    1048             :                         }
    1049           0 :                         *target++=c;
    1050           0 :                         if(offsets!=NULL) {
    1051           0 :                             *offsets++=sourceIndex;
    1052           0 :                             sourceIndex=nextSourceIndex-1;
    1053             :                         }
    1054           0 :                         bytes[0]=b; /* keep this byte in case an error occurs */
    1055           0 :                         byteIndex=1;
    1056           0 :                         bits=(uint16_t)(base64Value&15);
    1057           0 :                         base64Counter=6;
    1058           0 :                         break;
    1059             :                     case 7:
    1060           0 :                         c=(UChar)((bits<<6)|base64Value);
    1061           0 :                         if(isLegalIMAP(c)) {
    1062             :                             /* illegal */
    1063           0 :                             inDirectMode=TRUE;
    1064           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1065           0 :                             goto endloop;
    1066             :                         }
    1067           0 :                         *target++=c;
    1068           0 :                         if(offsets!=NULL) {
    1069           0 :                             *offsets++=sourceIndex;
    1070           0 :                             sourceIndex=nextSourceIndex;
    1071             :                         }
    1072           0 :                         byteIndex=0;
    1073           0 :                         bits=0;
    1074           0 :                         base64Counter=0;
    1075           0 :                         break;
    1076             :                     default:
    1077             :                         /* will never occur */
    1078           0 :                         break;
    1079             :                     }
    1080           0 :                 } else if(base64Value==-2) {
    1081             :                     /* minus sign terminates the base64 sequence */
    1082           0 :                     inDirectMode=TRUE;
    1083           0 :                     if(base64Counter==-1) {
    1084             :                         /* &- i.e. a minus immediately following an ampersand */
    1085           0 :                         *target++=AMPERSAND;
    1086           0 :                         if(offsets!=NULL) {
    1087           0 :                             *offsets++=sourceIndex-1;
    1088             :                         }
    1089             :                     } else {
    1090             :                         /* absorb the minus and leave the Unicode Mode */
    1091           0 :                         if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
    1092             :                             /* bits are illegally left over, a UChar is incomplete */
    1093             :                             /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
    1094           0 :                             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1095           0 :                             break;
    1096             :                         }
    1097             :                     }
    1098           0 :                     sourceIndex=nextSourceIndex;
    1099           0 :                     goto directMode;
    1100             :                 } else {
    1101           0 :                     if(base64Counter==-1) {
    1102             :                         /* illegal: & immediately followed by something other than base64 or minus sign */
    1103             :                         /* include the ampersand in the reported sequence */
    1104           0 :                         --sourceIndex;
    1105           0 :                         bytes[0]=AMPERSAND;
    1106           0 :                         bytes[1]=b;
    1107           0 :                         byteIndex=2;
    1108             :                     }
    1109             :                     /* base64Value==-1 for characters that are illegal only in Unicode mode */
    1110             :                     /* base64Value==-3 for illegal characters */
    1111             :                     /* illegal */
    1112           0 :                     inDirectMode=TRUE;
    1113           0 :                     *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    1114           0 :                     break;
    1115             :                 }
    1116             :             } else {
    1117             :                 /* target is full */
    1118           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1119           0 :                 break;
    1120             :             }
    1121             :         }
    1122             :     }
    1123             : endloop:
    1124             : 
    1125             :     /*
    1126             :      * the end of the input stream and detection of truncated input
    1127             :      * are handled by the framework, but here we must check if we are in Unicode
    1128             :      * mode and byteIndex==0 because we must end in direct mode
    1129             :      *
    1130             :      * conditions:
    1131             :      *   successful
    1132             :      *   in Unicode mode and byteIndex==0
    1133             :      *   end of input and no truncated input
    1134             :      */
    1135           0 :     if( U_SUCCESS(*pErrorCode) &&
    1136           0 :         !inDirectMode && byteIndex==0 &&
    1137           0 :         pArgs->flush && source>=sourceLimit
    1138             :     ) {
    1139           0 :         if(base64Counter==-1) {
    1140             :             /* & at the very end of the input */
    1141             :             /* make the ampersand the reported sequence */
    1142           0 :             bytes[0]=AMPERSAND;
    1143           0 :             byteIndex=1;
    1144             :         }
    1145             :         /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */
    1146             : 
    1147           0 :         inDirectMode=TRUE; /* avoid looping */
    1148           0 :         *pErrorCode=U_TRUNCATED_CHAR_FOUND;
    1149             :     }
    1150             : 
    1151             :     /* set the converter state back into UConverter */
    1152           0 :     cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
    1153           0 :     cnv->toULength=byteIndex;
    1154             : 
    1155             :     /* write back the updated pointers */
    1156           0 :     pArgs->source=(const char *)source;
    1157           0 :     pArgs->target=target;
    1158           0 :     pArgs->offsets=offsets;
    1159           0 :     return;
    1160             : }
    1161             : 
    1162             : static void U_CALLCONV
    1163           0 : _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
    1164             :                             UErrorCode *pErrorCode) {
    1165             :     UConverter *cnv;
    1166             :     const UChar *source, *sourceLimit;
    1167             :     uint8_t *target, *targetLimit;
    1168             :     int32_t *offsets;
    1169             : 
    1170             :     int32_t length, targetCapacity, sourceIndex;
    1171             :     UChar c;
    1172             :     uint8_t b;
    1173             : 
    1174             :     /* UTF-7 state */
    1175             :     uint8_t bits;
    1176             :     int8_t base64Counter;
    1177             :     UBool inDirectMode;
    1178             : 
    1179             :     /* set up the local pointers */
    1180           0 :     cnv=pArgs->converter;
    1181             : 
    1182             :     /* set up the local pointers */
    1183           0 :     source=pArgs->source;
    1184           0 :     sourceLimit=pArgs->sourceLimit;
    1185           0 :     target=(uint8_t *)pArgs->target;
    1186           0 :     targetLimit=(uint8_t *)pArgs->targetLimit;
    1187           0 :     offsets=pArgs->offsets;
    1188             : 
    1189             :     /* get the state machine state */
    1190             :     {
    1191           0 :         uint32_t status=cnv->fromUnicodeStatus;
    1192           0 :         inDirectMode=(UBool)((status>>24)&1);
    1193           0 :         base64Counter=(int8_t)(status>>16);
    1194           0 :         bits=(uint8_t)status;
    1195             :     }
    1196             : 
    1197             :     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
    1198           0 :     sourceIndex=0;
    1199             : 
    1200           0 :     if(inDirectMode) {
    1201             : directMode:
    1202           0 :         length=(int32_t)(sourceLimit-source);
    1203           0 :         targetCapacity=(int32_t)(targetLimit-target);
    1204           0 :         if(length>targetCapacity) {
    1205           0 :             length=targetCapacity;
    1206             :         }
    1207           0 :         while(length>0) {
    1208           0 :             c=*source++;
    1209             :             /* encode 0x20..0x7e except '&' directly */
    1210           0 :             if(inSetDIMAP(c)) {
    1211             :                 /* encode directly */
    1212           0 :                 *target++=(uint8_t)c;
    1213           0 :                 if(offsets!=NULL) {
    1214           0 :                     *offsets++=sourceIndex++;
    1215             :                 }
    1216           0 :             } else if(c==AMPERSAND) {
    1217             :                 /* output &- for & */
    1218           0 :                 *target++=AMPERSAND;
    1219           0 :                 if(target<targetLimit) {
    1220           0 :                     *target++=MINUS;
    1221           0 :                     if(offsets!=NULL) {
    1222           0 :                         *offsets++=sourceIndex;
    1223           0 :                         *offsets++=sourceIndex++;
    1224             :                     }
    1225             :                     /* realign length and targetCapacity */
    1226           0 :                     goto directMode;
    1227             :                 } else {
    1228           0 :                     if(offsets!=NULL) {
    1229           0 :                         *offsets++=sourceIndex++;
    1230             :                     }
    1231           0 :                     cnv->charErrorBuffer[0]=MINUS;
    1232           0 :                     cnv->charErrorBufferLength=1;
    1233           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1234           0 :                     break;
    1235             :                 }
    1236             :             } else {
    1237             :                 /* un-read this character and switch to Unicode Mode */
    1238           0 :                 --source;
    1239           0 :                 *target++=AMPERSAND;
    1240           0 :                 if(offsets!=NULL) {
    1241           0 :                     *offsets++=sourceIndex;
    1242             :                 }
    1243           0 :                 inDirectMode=FALSE;
    1244           0 :                 base64Counter=0;
    1245           0 :                 goto unicodeMode;
    1246             :             }
    1247           0 :             --length;
    1248             :         }
    1249           0 :         if(source<sourceLimit && target>=targetLimit) {
    1250             :             /* target is full */
    1251           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1252             :         }
    1253             :     } else {
    1254             : unicodeMode:
    1255           0 :         while(source<sourceLimit) {
    1256           0 :             if(target<targetLimit) {
    1257           0 :                 c=*source++;
    1258           0 :                 if(isLegalIMAP(c)) {
    1259             :                     /* encode directly */
    1260           0 :                     inDirectMode=TRUE;
    1261             : 
    1262             :                     /* trick: back out this character to make this easier */
    1263           0 :                     --source;
    1264             : 
    1265             :                     /* terminate the base64 sequence */
    1266           0 :                     if(base64Counter!=0) {
    1267             :                         /* write remaining bits for the previous character */
    1268           0 :                         *target++=TO_BASE64_IMAP(bits);
    1269           0 :                         if(offsets!=NULL) {
    1270           0 :                             *offsets++=sourceIndex-1;
    1271             :                         }
    1272             :                     }
    1273             :                     /* need to terminate with a minus */
    1274           0 :                     if(target<targetLimit) {
    1275           0 :                         *target++=MINUS;
    1276           0 :                         if(offsets!=NULL) {
    1277           0 :                             *offsets++=sourceIndex-1;
    1278             :                         }
    1279             :                     } else {
    1280           0 :                         cnv->charErrorBuffer[0]=MINUS;
    1281           0 :                         cnv->charErrorBufferLength=1;
    1282           0 :                         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1283           0 :                         break;
    1284             :                     }
    1285           0 :                     goto directMode;
    1286             :                 } else {
    1287             :                     /*
    1288             :                      * base64 this character:
    1289             :                      * Output 2 or 3 base64 bytes for the remaining bits of the previous character
    1290             :                      * and the bits of this character, each implicitly in UTF-16BE.
    1291             :                      *
    1292             :                      * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
    1293             :                      * character to the next. The actual 2 or 4 bits are shifted to the left edge
    1294             :                      * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
    1295             :                      */
    1296           0 :                     switch(base64Counter) {
    1297             :                     case 0:
    1298           0 :                         b=(uint8_t)(c>>10);
    1299           0 :                         *target++=TO_BASE64_IMAP(b);
    1300           0 :                         if(target<targetLimit) {
    1301           0 :                             b=(uint8_t)((c>>4)&0x3f);
    1302           0 :                             *target++=TO_BASE64_IMAP(b);
    1303           0 :                             if(offsets!=NULL) {
    1304           0 :                                 *offsets++=sourceIndex;
    1305           0 :                                 *offsets++=sourceIndex++;
    1306             :                             }
    1307             :                         } else {
    1308           0 :                             if(offsets!=NULL) {
    1309           0 :                                 *offsets++=sourceIndex++;
    1310             :                             }
    1311           0 :                             b=(uint8_t)((c>>4)&0x3f);
    1312           0 :                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
    1313           0 :                             cnv->charErrorBufferLength=1;
    1314           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1315             :                         }
    1316           0 :                         bits=(uint8_t)((c&15)<<2);
    1317           0 :                         base64Counter=1;
    1318           0 :                         break;
    1319             :                     case 1:
    1320           0 :                         b=(uint8_t)(bits|(c>>14));
    1321           0 :                         *target++=TO_BASE64_IMAP(b);
    1322           0 :                         if(target<targetLimit) {
    1323           0 :                             b=(uint8_t)((c>>8)&0x3f);
    1324           0 :                             *target++=TO_BASE64_IMAP(b);
    1325           0 :                             if(target<targetLimit) {
    1326           0 :                                 b=(uint8_t)((c>>2)&0x3f);
    1327           0 :                                 *target++=TO_BASE64_IMAP(b);
    1328           0 :                                 if(offsets!=NULL) {
    1329           0 :                                     *offsets++=sourceIndex;
    1330           0 :                                     *offsets++=sourceIndex;
    1331           0 :                                     *offsets++=sourceIndex++;
    1332             :                                 }
    1333             :                             } else {
    1334           0 :                                 if(offsets!=NULL) {
    1335           0 :                                     *offsets++=sourceIndex;
    1336           0 :                                     *offsets++=sourceIndex++;
    1337             :                                 }
    1338           0 :                                 b=(uint8_t)((c>>2)&0x3f);
    1339           0 :                                 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
    1340           0 :                                 cnv->charErrorBufferLength=1;
    1341           0 :                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1342             :                             }
    1343             :                         } else {
    1344           0 :                             if(offsets!=NULL) {
    1345           0 :                                 *offsets++=sourceIndex++;
    1346             :                             }
    1347           0 :                             b=(uint8_t)((c>>8)&0x3f);
    1348           0 :                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
    1349           0 :                             b=(uint8_t)((c>>2)&0x3f);
    1350           0 :                             cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
    1351           0 :                             cnv->charErrorBufferLength=2;
    1352           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1353             :                         }
    1354           0 :                         bits=(uint8_t)((c&3)<<4);
    1355           0 :                         base64Counter=2;
    1356           0 :                         break;
    1357             :                     case 2:
    1358           0 :                         b=(uint8_t)(bits|(c>>12));
    1359           0 :                         *target++=TO_BASE64_IMAP(b);
    1360           0 :                         if(target<targetLimit) {
    1361           0 :                             b=(uint8_t)((c>>6)&0x3f);
    1362           0 :                             *target++=TO_BASE64_IMAP(b);
    1363           0 :                             if(target<targetLimit) {
    1364           0 :                                 b=(uint8_t)(c&0x3f);
    1365           0 :                                 *target++=TO_BASE64_IMAP(b);
    1366           0 :                                 if(offsets!=NULL) {
    1367           0 :                                     *offsets++=sourceIndex;
    1368           0 :                                     *offsets++=sourceIndex;
    1369           0 :                                     *offsets++=sourceIndex++;
    1370             :                                 }
    1371             :                             } else {
    1372           0 :                                 if(offsets!=NULL) {
    1373           0 :                                     *offsets++=sourceIndex;
    1374           0 :                                     *offsets++=sourceIndex++;
    1375             :                                 }
    1376           0 :                                 b=(uint8_t)(c&0x3f);
    1377           0 :                                 cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
    1378           0 :                                 cnv->charErrorBufferLength=1;
    1379           0 :                                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1380             :                             }
    1381             :                         } else {
    1382           0 :                             if(offsets!=NULL) {
    1383           0 :                                 *offsets++=sourceIndex++;
    1384             :                             }
    1385           0 :                             b=(uint8_t)((c>>6)&0x3f);
    1386           0 :                             cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
    1387           0 :                             b=(uint8_t)(c&0x3f);
    1388           0 :                             cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
    1389           0 :                             cnv->charErrorBufferLength=2;
    1390           0 :                             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1391             :                         }
    1392           0 :                         bits=0;
    1393           0 :                         base64Counter=0;
    1394           0 :                         break;
    1395             :                     default:
    1396             :                         /* will never occur */
    1397           0 :                         break;
    1398             :                     }
    1399             :                 }
    1400             :             } else {
    1401             :                 /* target is full */
    1402           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1403           0 :                 break;
    1404             :             }
    1405             :         }
    1406             :     }
    1407             : 
    1408           0 :     if(pArgs->flush && source>=sourceLimit) {
    1409             :         /* flush remaining bits to the target */
    1410           0 :         if(!inDirectMode) {
    1411           0 :             if(base64Counter!=0) {
    1412           0 :                 if(target<targetLimit) {
    1413           0 :                     *target++=TO_BASE64_IMAP(bits);
    1414           0 :                     if(offsets!=NULL) {
    1415           0 :                         *offsets++=sourceIndex-1;
    1416             :                     }
    1417             :                 } else {
    1418           0 :                     cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
    1419           0 :                     *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1420             :                 }
    1421             :             }
    1422             :             /* need to terminate with a minus */
    1423           0 :             if(target<targetLimit) {
    1424           0 :                 *target++=MINUS;
    1425           0 :                 if(offsets!=NULL) {
    1426           0 :                     *offsets++=sourceIndex-1;
    1427             :                 }
    1428             :             } else {
    1429           0 :                 cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
    1430           0 :                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    1431             :             }
    1432             :         }
    1433             :         /* reset the state for the next conversion */
    1434           0 :         cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
    1435             :     } else {
    1436             :         /* set the converter state back into UConverter */
    1437           0 :         cnv->fromUnicodeStatus=
    1438           0 :             (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
    1439           0 :             ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
    1440             :     }
    1441             : 
    1442             :     /* write back the updated pointers */
    1443           0 :     pArgs->source=source;
    1444           0 :     pArgs->target=(char *)target;
    1445           0 :     pArgs->offsets=offsets;
    1446           0 :     return;
    1447             : }
    1448             : U_CDECL_END
    1449             : 
    1450             : static const UConverterImpl _IMAPImpl={
    1451             :     UCNV_IMAP_MAILBOX,
    1452             : 
    1453             :     NULL,
    1454             :     NULL,
    1455             : 
    1456             :     _UTF7Open,
    1457             :     NULL,
    1458             :     _UTF7Reset,
    1459             : 
    1460             :     _IMAPToUnicodeWithOffsets,
    1461             :     _IMAPToUnicodeWithOffsets,
    1462             :     _IMAPFromUnicodeWithOffsets,
    1463             :     _IMAPFromUnicodeWithOffsets,
    1464             :     NULL,
    1465             : 
    1466             :     NULL,
    1467             :     NULL,
    1468             :     NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */
    1469             :     NULL,
    1470             :     ucnv_getCompleteUnicodeSet,
    1471             :     NULL,
    1472             :     NULL
    1473             : };
    1474             : 
    1475             : static const UConverterStaticData _IMAPStaticData={
    1476             :     sizeof(UConverterStaticData),
    1477             :     "IMAP-mailbox-name",
    1478             :     0, /* TODO CCSID for IMAP-mailbox-name */
    1479             :     UCNV_IBM, UCNV_IMAP_MAILBOX,
    1480             :     1, 4,
    1481             :     { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
    1482             :     FALSE, FALSE,
    1483             :     0,
    1484             :     0,
    1485             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
    1486             : };
    1487             : 
    1488             : const UConverterSharedData _IMAPData=
    1489             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl);
    1490             : 
    1491             : #endif

Generated by: LCOV version 1.13