LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucnvlat1.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 332 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 9 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /* 
       4             : **********************************************************************
       5             : *   Copyright (C) 2000-2015, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : *   file name:  ucnvlat1.cpp
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2000feb07
      14             : *   created by: Markus W. Scherer
      15             : */
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_CONVERSION
      20             : 
      21             : #include "unicode/ucnv.h"
      22             : #include "unicode/uset.h"
      23             : #include "unicode/utf8.h"
      24             : #include "ucnv_bld.h"
      25             : #include "ucnv_cnv.h"
      26             : 
      27             : /* control optimizations according to the platform */
      28             : #define LATIN1_UNROLL_FROM_UNICODE 1
      29             : 
      30             : /* ISO 8859-1 --------------------------------------------------------------- */
      31             : 
      32             : /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
      33             : U_CDECL_BEGIN
      34             : static void U_CALLCONV
      35           0 : _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
      36             :                             UErrorCode *pErrorCode) {
      37             :     const uint8_t *source;
      38             :     UChar *target;
      39             :     int32_t targetCapacity, length;
      40             :     int32_t *offsets;
      41             : 
      42             :     int32_t sourceIndex;
      43             : 
      44             :     /* set up the local pointers */
      45           0 :     source=(const uint8_t *)pArgs->source;
      46           0 :     target=pArgs->target;
      47           0 :     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
      48           0 :     offsets=pArgs->offsets;
      49             : 
      50           0 :     sourceIndex=0;
      51             : 
      52             :     /*
      53             :      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
      54             :      * for the minimum of the sourceLength and targetCapacity
      55             :      */
      56           0 :     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
      57           0 :     if(length<=targetCapacity) {
      58           0 :         targetCapacity=length;
      59             :     } else {
      60             :         /* target will be full */
      61           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
      62           0 :         length=targetCapacity;
      63             :     }
      64             : 
      65           0 :     if(targetCapacity>=8) {
      66             :         /* This loop is unrolled for speed and improved pipelining. */
      67             :         int32_t count, loops;
      68             : 
      69           0 :         loops=count=targetCapacity>>3;
      70           0 :         length=targetCapacity&=0x7;
      71           0 :         do {
      72           0 :             target[0]=source[0];
      73           0 :             target[1]=source[1];
      74           0 :             target[2]=source[2];
      75           0 :             target[3]=source[3];
      76           0 :             target[4]=source[4];
      77           0 :             target[5]=source[5];
      78           0 :             target[6]=source[6];
      79           0 :             target[7]=source[7];
      80           0 :             target+=8;
      81           0 :             source+=8;
      82             :         } while(--count>0);
      83             : 
      84           0 :         if(offsets!=NULL) {
      85           0 :             do {
      86           0 :                 offsets[0]=sourceIndex++;
      87           0 :                 offsets[1]=sourceIndex++;
      88           0 :                 offsets[2]=sourceIndex++;
      89           0 :                 offsets[3]=sourceIndex++;
      90           0 :                 offsets[4]=sourceIndex++;
      91           0 :                 offsets[5]=sourceIndex++;
      92           0 :                 offsets[6]=sourceIndex++;
      93           0 :                 offsets[7]=sourceIndex++;
      94           0 :                 offsets+=8;
      95             :             } while(--loops>0);
      96             :         }
      97             :     }
      98             : 
      99             :     /* conversion loop */
     100           0 :     while(targetCapacity>0) {
     101           0 :         *target++=*source++;
     102           0 :         --targetCapacity;
     103             :     }
     104             : 
     105             :     /* write back the updated pointers */
     106           0 :     pArgs->source=(const char *)source;
     107           0 :     pArgs->target=target;
     108             : 
     109             :     /* set offsets */
     110           0 :     if(offsets!=NULL) {
     111           0 :         while(length>0) {
     112           0 :             *offsets++=sourceIndex++;
     113           0 :             --length;
     114             :         }
     115           0 :         pArgs->offsets=offsets;
     116             :     }
     117           0 : }
     118             : 
     119             : /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
     120             : static UChar32 U_CALLCONV
     121           0 : _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
     122             :                     UErrorCode *pErrorCode) {
     123           0 :     const uint8_t *source=(const uint8_t *)pArgs->source;
     124           0 :     if(source<(const uint8_t *)pArgs->sourceLimit) {
     125           0 :         pArgs->source=(const char *)(source+1);
     126           0 :         return *source;
     127             :     }
     128             : 
     129             :     /* no output because of empty input */
     130           0 :     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     131           0 :     return 0xffff;
     132             : }
     133             : 
     134             : /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
     135             : static void U_CALLCONV
     136           0 : _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     137             :                               UErrorCode *pErrorCode) {
     138             :     UConverter *cnv;
     139             :     const UChar *source, *sourceLimit;
     140             :     uint8_t *target, *oldTarget;
     141             :     int32_t targetCapacity, length;
     142             :     int32_t *offsets;
     143             : 
     144             :     UChar32 cp;
     145             :     UChar c, max;
     146             : 
     147             :     int32_t sourceIndex;
     148             : 
     149             :     /* set up the local pointers */
     150           0 :     cnv=pArgs->converter;
     151           0 :     source=pArgs->source;
     152           0 :     sourceLimit=pArgs->sourceLimit;
     153           0 :     target=oldTarget=(uint8_t *)pArgs->target;
     154           0 :     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
     155           0 :     offsets=pArgs->offsets;
     156             : 
     157           0 :     if(cnv->sharedData==&_Latin1Data) {
     158           0 :         max=0xff; /* Latin-1 */
     159             :     } else {
     160           0 :         max=0x7f; /* US-ASCII */
     161             :     }
     162             : 
     163             :     /* get the converter state from UConverter */
     164           0 :     cp=cnv->fromUChar32;
     165             : 
     166             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     167           0 :     sourceIndex= cp==0 ? 0 : -1;
     168             : 
     169             :     /*
     170             :      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     171             :      * for the minimum of the sourceLength and targetCapacity
     172             :      */
     173           0 :     length=(int32_t)(sourceLimit-source);
     174           0 :     if(length<targetCapacity) {
     175           0 :         targetCapacity=length;
     176             :     }
     177             : 
     178             :     /* conversion loop */
     179           0 :     if(cp!=0 && targetCapacity>0) {
     180           0 :         goto getTrail;
     181             :     }
     182             : 
     183             : #if LATIN1_UNROLL_FROM_UNICODE
     184             :     /* unroll the loop with the most common case */
     185           0 :     if(targetCapacity>=16) {
     186             :         int32_t count, loops;
     187             :         UChar u, oredChars;
     188             : 
     189           0 :         loops=count=targetCapacity>>4;
     190           0 :         do {
     191           0 :             oredChars=u=*source++;
     192           0 :             *target++=(uint8_t)u;
     193           0 :             oredChars|=u=*source++;
     194           0 :             *target++=(uint8_t)u;
     195           0 :             oredChars|=u=*source++;
     196           0 :             *target++=(uint8_t)u;
     197           0 :             oredChars|=u=*source++;
     198           0 :             *target++=(uint8_t)u;
     199           0 :             oredChars|=u=*source++;
     200           0 :             *target++=(uint8_t)u;
     201           0 :             oredChars|=u=*source++;
     202           0 :             *target++=(uint8_t)u;
     203           0 :             oredChars|=u=*source++;
     204           0 :             *target++=(uint8_t)u;
     205           0 :             oredChars|=u=*source++;
     206           0 :             *target++=(uint8_t)u;
     207           0 :             oredChars|=u=*source++;
     208           0 :             *target++=(uint8_t)u;
     209           0 :             oredChars|=u=*source++;
     210           0 :             *target++=(uint8_t)u;
     211           0 :             oredChars|=u=*source++;
     212           0 :             *target++=(uint8_t)u;
     213           0 :             oredChars|=u=*source++;
     214           0 :             *target++=(uint8_t)u;
     215           0 :             oredChars|=u=*source++;
     216           0 :             *target++=(uint8_t)u;
     217           0 :             oredChars|=u=*source++;
     218           0 :             *target++=(uint8_t)u;
     219           0 :             oredChars|=u=*source++;
     220           0 :             *target++=(uint8_t)u;
     221           0 :             oredChars|=u=*source++;
     222           0 :             *target++=(uint8_t)u;
     223             : 
     224             :             /* were all 16 entries really valid? */
     225           0 :             if(oredChars>max) {
     226             :                 /* no, return to the first of these 16 */
     227           0 :                 source-=16;
     228           0 :                 target-=16;
     229           0 :                 break;
     230             :             }
     231             :         } while(--count>0);
     232           0 :         count=loops-count;
     233           0 :         targetCapacity-=16*count;
     234             : 
     235           0 :         if(offsets!=NULL) {
     236           0 :             oldTarget+=16*count;
     237           0 :             while(count>0) {
     238           0 :                 *offsets++=sourceIndex++;
     239           0 :                 *offsets++=sourceIndex++;
     240           0 :                 *offsets++=sourceIndex++;
     241           0 :                 *offsets++=sourceIndex++;
     242           0 :                 *offsets++=sourceIndex++;
     243           0 :                 *offsets++=sourceIndex++;
     244           0 :                 *offsets++=sourceIndex++;
     245           0 :                 *offsets++=sourceIndex++;
     246           0 :                 *offsets++=sourceIndex++;
     247           0 :                 *offsets++=sourceIndex++;
     248           0 :                 *offsets++=sourceIndex++;
     249           0 :                 *offsets++=sourceIndex++;
     250           0 :                 *offsets++=sourceIndex++;
     251           0 :                 *offsets++=sourceIndex++;
     252           0 :                 *offsets++=sourceIndex++;
     253           0 :                 *offsets++=sourceIndex++;
     254           0 :                 --count;
     255             :             }
     256             :         }
     257             :     }
     258             : #endif
     259             : 
     260             :     /* conversion loop */
     261           0 :     c=0;
     262           0 :     while(targetCapacity>0 && (c=*source++)<=max) {
     263             :         /* convert the Unicode code point */
     264           0 :         *target++=(uint8_t)c;
     265           0 :         --targetCapacity;
     266             :     }
     267             : 
     268           0 :     if(c>max) {
     269           0 :         cp=c;
     270           0 :         if(!U_IS_SURROGATE(cp)) {
     271             :             /* callback(unassigned) */
     272           0 :         } else if(U_IS_SURROGATE_LEAD(cp)) {
     273             : getTrail:
     274           0 :             if(source<sourceLimit) {
     275             :                 /* test the following code unit */
     276           0 :                 UChar trail=*source;
     277           0 :                 if(U16_IS_TRAIL(trail)) {
     278           0 :                     ++source;
     279           0 :                     cp=U16_GET_SUPPLEMENTARY(cp, trail);
     280             :                     /* this codepage does not map supplementary code points */
     281             :                     /* callback(unassigned) */
     282             :                 } else {
     283             :                     /* this is an unmatched lead code unit (1st surrogate) */
     284             :                     /* callback(illegal) */
     285             :                 }
     286             :             } else {
     287             :                 /* no more input */
     288           0 :                 cnv->fromUChar32=cp;
     289           0 :                 goto noMoreInput;
     290             :             }
     291             :         } else {
     292             :             /* this is an unmatched trail code unit (2nd surrogate) */
     293             :             /* callback(illegal) */
     294             :         }
     295             : 
     296           0 :         *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
     297           0 :         cnv->fromUChar32=cp;
     298             :     }
     299             : noMoreInput:
     300             : 
     301             :     /* set offsets since the start */
     302           0 :     if(offsets!=NULL) {
     303           0 :         size_t count=target-oldTarget;
     304           0 :         while(count>0) {
     305           0 :             *offsets++=sourceIndex++;
     306           0 :             --count;
     307             :         }
     308             :     }
     309             : 
     310           0 :     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
     311             :         /* target is full */
     312           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     313             :     }
     314             : 
     315             :     /* write back the updated pointers */
     316           0 :     pArgs->source=source;
     317           0 :     pArgs->target=(char *)target;
     318           0 :     pArgs->offsets=offsets;
     319           0 : }
     320             : 
     321             : /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
     322             : static void U_CALLCONV
     323           0 : ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     324             :                     UConverterToUnicodeArgs *pToUArgs,
     325             :                     UErrorCode *pErrorCode) {
     326             :     UConverter *utf8;
     327             :     const uint8_t *source, *sourceLimit;
     328             :     uint8_t *target;
     329             :     int32_t targetCapacity;
     330             : 
     331             :     UChar32 c;
     332             :     uint8_t b, t1;
     333             : 
     334             :     /* set up the local pointers */
     335           0 :     utf8=pToUArgs->converter;
     336           0 :     source=(uint8_t *)pToUArgs->source;
     337           0 :     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
     338           0 :     target=(uint8_t *)pFromUArgs->target;
     339           0 :     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
     340             : 
     341             :     /* get the converter state from the UTF-8 UConverter */
     342           0 :     c=(UChar32)utf8->toUnicodeStatus;
     343           0 :     if(c!=0 && source<sourceLimit) {
     344           0 :         if(targetCapacity==0) {
     345           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     346           0 :             return;
     347           0 :         } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
     348           0 :             ++source;
     349           0 :             *target++=(uint8_t)(((c&3)<<6)|t1);
     350           0 :             --targetCapacity;
     351             : 
     352           0 :             utf8->toUnicodeStatus=0;
     353           0 :             utf8->toULength=0;
     354             :         } else {
     355             :             /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
     356           0 :             *pErrorCode=U_USING_DEFAULT_WARNING;
     357           0 :             return;
     358             :         }
     359             :     }
     360             : 
     361             :     /*
     362             :      * Make sure that the last byte sequence before sourceLimit is complete
     363             :      * or runs into a lead byte.
     364             :      * In the conversion loop compare source with sourceLimit only once
     365             :      * per multi-byte character.
     366             :      * For Latin-1, adjust sourceLimit only for 1 trail byte because
     367             :      * the conversion loop handles at most 2-byte sequences.
     368             :      */
     369           0 :     if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
     370           0 :         --sourceLimit;
     371             :     }
     372             : 
     373             :     /* conversion loop */
     374           0 :     while(source<sourceLimit) {
     375           0 :         if(targetCapacity>0) {
     376           0 :             b=*source++;
     377           0 :             if((int8_t)b>=0) {
     378             :                 /* convert ASCII */
     379           0 :                 *target++=(uint8_t)b;
     380           0 :                 --targetCapacity;
     381           0 :             } else if( /* handle U+0080..U+00FF inline */
     382           0 :                        b>=0xc2 && b<=0xc3 &&
     383           0 :                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
     384             :             ) {
     385           0 :                 ++source;
     386           0 :                 *target++=(uint8_t)(((b&3)<<6)|t1);
     387           0 :                 --targetCapacity;
     388             :             } else {
     389             :                 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
     390           0 :                 pToUArgs->source=(char *)(source-1);
     391           0 :                 pFromUArgs->target=(char *)target;
     392           0 :                 *pErrorCode=U_USING_DEFAULT_WARNING;
     393           0 :                 return;
     394             :             }
     395             :         } else {
     396             :             /* target is full */
     397           0 :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     398           0 :             break;
     399             :         }
     400             :     }
     401             : 
     402             :     /*
     403             :      * The sourceLimit may have been adjusted before the conversion loop
     404             :      * to stop before a truncated sequence.
     405             :      * If so, then collect the truncated sequence now.
     406             :      * For Latin-1, there is at most exactly one lead byte because of the
     407             :      * smaller sourceLimit adjustment logic.
     408             :      */
     409           0 :     if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
     410           0 :         utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
     411           0 :         utf8->toULength=1;
     412           0 :         utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
     413             :     }
     414             : 
     415             :     /* write back the updated pointers */
     416           0 :     pToUArgs->source=(char *)source;
     417           0 :     pFromUArgs->target=(char *)target;
     418             : }
     419             : 
     420             : static void U_CALLCONV
     421           0 : _Latin1GetUnicodeSet(const UConverter *cnv,
     422             :                      const USetAdder *sa,
     423             :                      UConverterUnicodeSet which,
     424             :                      UErrorCode *pErrorCode) {
     425             :     (void)cnv;
     426             :     (void)which;
     427             :     (void)pErrorCode;
     428           0 :     sa->addRange(sa->set, 0, 0xff);
     429           0 : }
     430             : U_CDECL_END
     431             : 
     432             : 
     433             : static const UConverterImpl _Latin1Impl={
     434             :     UCNV_LATIN_1,
     435             : 
     436             :     NULL,
     437             :     NULL,
     438             : 
     439             :     NULL,
     440             :     NULL,
     441             :     NULL,
     442             : 
     443             :     _Latin1ToUnicodeWithOffsets,
     444             :     _Latin1ToUnicodeWithOffsets,
     445             :     _Latin1FromUnicodeWithOffsets,
     446             :     _Latin1FromUnicodeWithOffsets,
     447             :     _Latin1GetNextUChar,
     448             : 
     449             :     NULL,
     450             :     NULL,
     451             :     NULL,
     452             :     NULL,
     453             :     _Latin1GetUnicodeSet,
     454             : 
     455             :     NULL,
     456             :     ucnv_Latin1FromUTF8
     457             : };
     458             : 
     459             : static const UConverterStaticData _Latin1StaticData={
     460             :     sizeof(UConverterStaticData),
     461             :     "ISO-8859-1",
     462             :     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
     463             :     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
     464             :     0,
     465             :     0,
     466             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     467             : };
     468             : 
     469             : const UConverterSharedData _Latin1Data=
     470             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
     471             : 
     472             : /* US-ASCII ----------------------------------------------------------------- */
     473             : 
     474             : U_CDECL_BEGIN
     475             : /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
     476             : static void U_CALLCONV
     477           0 : _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     478             :                            UErrorCode *pErrorCode) {
     479             :     const uint8_t *source, *sourceLimit;
     480             :     UChar *target, *oldTarget;
     481             :     int32_t targetCapacity, length;
     482             :     int32_t *offsets;
     483             : 
     484             :     int32_t sourceIndex;
     485             : 
     486             :     uint8_t c;
     487             : 
     488             :     /* set up the local pointers */
     489           0 :     source=(const uint8_t *)pArgs->source;
     490           0 :     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
     491           0 :     target=oldTarget=pArgs->target;
     492           0 :     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
     493           0 :     offsets=pArgs->offsets;
     494             : 
     495             :     /* sourceIndex=-1 if the current character began in the previous buffer */
     496           0 :     sourceIndex=0;
     497             : 
     498             :     /*
     499             :      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     500             :      * for the minimum of the sourceLength and targetCapacity
     501             :      */
     502           0 :     length=(int32_t)(sourceLimit-source);
     503           0 :     if(length<targetCapacity) {
     504           0 :         targetCapacity=length;
     505             :     }
     506             : 
     507           0 :     if(targetCapacity>=8) {
     508             :         /* This loop is unrolled for speed and improved pipelining. */
     509             :         int32_t count, loops;
     510             :         UChar oredChars;
     511             : 
     512           0 :         loops=count=targetCapacity>>3;
     513           0 :         do {
     514           0 :             oredChars=target[0]=source[0];
     515           0 :             oredChars|=target[1]=source[1];
     516           0 :             oredChars|=target[2]=source[2];
     517           0 :             oredChars|=target[3]=source[3];
     518           0 :             oredChars|=target[4]=source[4];
     519           0 :             oredChars|=target[5]=source[5];
     520           0 :             oredChars|=target[6]=source[6];
     521           0 :             oredChars|=target[7]=source[7];
     522             : 
     523             :             /* were all 16 entries really valid? */
     524           0 :             if(oredChars>0x7f) {
     525             :                 /* no, return to the first of these 16 */
     526           0 :                 break;
     527             :             }
     528           0 :             source+=8;
     529           0 :             target+=8;
     530             :         } while(--count>0);
     531           0 :         count=loops-count;
     532           0 :         targetCapacity-=count*8;
     533             : 
     534           0 :         if(offsets!=NULL) {
     535           0 :             oldTarget+=count*8;
     536           0 :             while(count>0) {
     537           0 :                 offsets[0]=sourceIndex++;
     538           0 :                 offsets[1]=sourceIndex++;
     539           0 :                 offsets[2]=sourceIndex++;
     540           0 :                 offsets[3]=sourceIndex++;
     541           0 :                 offsets[4]=sourceIndex++;
     542           0 :                 offsets[5]=sourceIndex++;
     543           0 :                 offsets[6]=sourceIndex++;
     544           0 :                 offsets[7]=sourceIndex++;
     545           0 :                 offsets+=8;
     546           0 :                 --count;
     547             :             }
     548             :         }
     549             :     }
     550             : 
     551             :     /* conversion loop */
     552           0 :     c=0;
     553           0 :     while(targetCapacity>0 && (c=*source++)<=0x7f) {
     554           0 :         *target++=c;
     555           0 :         --targetCapacity;
     556             :     }
     557             : 
     558           0 :     if(c>0x7f) {
     559             :         /* callback(illegal); copy the current bytes to toUBytes[] */
     560           0 :         UConverter *cnv=pArgs->converter;
     561           0 :         cnv->toUBytes[0]=c;
     562           0 :         cnv->toULength=1;
     563           0 :         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     564           0 :     } else if(source<sourceLimit && target>=pArgs->targetLimit) {
     565             :         /* target is full */
     566           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     567             :     }
     568             : 
     569             :     /* set offsets since the start */
     570           0 :     if(offsets!=NULL) {
     571           0 :         size_t count=target-oldTarget;
     572           0 :         while(count>0) {
     573           0 :             *offsets++=sourceIndex++;
     574           0 :             --count;
     575             :         }
     576             :     }
     577             : 
     578             :     /* write back the updated pointers */
     579           0 :     pArgs->source=(const char *)source;
     580           0 :     pArgs->target=target;
     581           0 :     pArgs->offsets=offsets;
     582           0 : }
     583             : 
     584             : /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
     585             : static UChar32 U_CALLCONV
     586           0 : _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
     587             :                    UErrorCode *pErrorCode) {
     588             :     const uint8_t *source;
     589             :     uint8_t b;
     590             : 
     591           0 :     source=(const uint8_t *)pArgs->source;
     592           0 :     if(source<(const uint8_t *)pArgs->sourceLimit) {
     593           0 :         b=*source++;
     594           0 :         pArgs->source=(const char *)source;
     595           0 :         if(b<=0x7f) {
     596           0 :             return b;
     597             :         } else {
     598           0 :             UConverter *cnv=pArgs->converter;
     599           0 :             cnv->toUBytes[0]=b;
     600           0 :             cnv->toULength=1;
     601           0 :             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
     602           0 :             return 0xffff;
     603             :         }
     604             :     }
     605             : 
     606             :     /* no output because of empty input */
     607           0 :     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     608           0 :     return 0xffff;
     609             : }
     610             : 
     611             : /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
     612             : static void U_CALLCONV
     613           0 : ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
     614             :                    UConverterToUnicodeArgs *pToUArgs,
     615             :                    UErrorCode *pErrorCode) {
     616             :     const uint8_t *source, *sourceLimit;
     617             :     uint8_t *target;
     618             :     int32_t targetCapacity, length;
     619             : 
     620             :     uint8_t c;
     621             : 
     622           0 :     if(pToUArgs->converter->toUnicodeStatus!=0) {
     623             :         /* no handling of partial UTF-8 characters here, fall back to pivoting */
     624           0 :         *pErrorCode=U_USING_DEFAULT_WARNING;
     625           0 :         return;
     626             :     }
     627             : 
     628             :     /* set up the local pointers */
     629           0 :     source=(const uint8_t *)pToUArgs->source;
     630           0 :     sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
     631           0 :     target=(uint8_t *)pFromUArgs->target;
     632           0 :     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
     633             : 
     634             :     /*
     635             :      * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
     636             :      * for the minimum of the sourceLength and targetCapacity
     637             :      */
     638           0 :     length=(int32_t)(sourceLimit-source);
     639           0 :     if(length<targetCapacity) {
     640           0 :         targetCapacity=length;
     641             :     }
     642             : 
     643             :     /* unroll the loop with the most common case */
     644           0 :     if(targetCapacity>=16) {
     645             :         int32_t count, loops;
     646             :         uint8_t oredChars;
     647             : 
     648           0 :         loops=count=targetCapacity>>4;
     649           0 :         do {
     650           0 :             oredChars=*target++=*source++;
     651           0 :             oredChars|=*target++=*source++;
     652           0 :             oredChars|=*target++=*source++;
     653           0 :             oredChars|=*target++=*source++;
     654           0 :             oredChars|=*target++=*source++;
     655           0 :             oredChars|=*target++=*source++;
     656           0 :             oredChars|=*target++=*source++;
     657           0 :             oredChars|=*target++=*source++;
     658           0 :             oredChars|=*target++=*source++;
     659           0 :             oredChars|=*target++=*source++;
     660           0 :             oredChars|=*target++=*source++;
     661           0 :             oredChars|=*target++=*source++;
     662           0 :             oredChars|=*target++=*source++;
     663           0 :             oredChars|=*target++=*source++;
     664           0 :             oredChars|=*target++=*source++;
     665           0 :             oredChars|=*target++=*source++;
     666             : 
     667             :             /* were all 16 entries really valid? */
     668           0 :             if(oredChars>0x7f) {
     669             :                 /* no, return to the first of these 16 */
     670           0 :                 source-=16;
     671           0 :                 target-=16;
     672           0 :                 break;
     673             :             }
     674             :         } while(--count>0);
     675           0 :         count=loops-count;
     676           0 :         targetCapacity-=16*count;
     677             :     }
     678             : 
     679             :     /* conversion loop */
     680           0 :     c=0;
     681           0 :     while(targetCapacity>0 && (c=*source)<=0x7f) {
     682           0 :         ++source;
     683           0 :         *target++=c;
     684           0 :         --targetCapacity;
     685             :     }
     686             : 
     687           0 :     if(c>0x7f) {
     688             :         /* non-ASCII character, handle in standard converter */
     689           0 :         *pErrorCode=U_USING_DEFAULT_WARNING;
     690           0 :     } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
     691             :         /* target is full */
     692           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     693             :     }
     694             : 
     695             :     /* write back the updated pointers */
     696           0 :     pToUArgs->source=(const char *)source;
     697           0 :     pFromUArgs->target=(char *)target;
     698             : }
     699             : 
     700             : static void U_CALLCONV
     701           0 : _ASCIIGetUnicodeSet(const UConverter *cnv,
     702             :                     const USetAdder *sa,
     703             :                     UConverterUnicodeSet which,
     704             :                     UErrorCode *pErrorCode) {
     705             :     (void)cnv;
     706             :     (void)which;
     707             :     (void)pErrorCode;
     708           0 :     sa->addRange(sa->set, 0, 0x7f);
     709           0 : }
     710             : U_CDECL_END
     711             : 
     712             : static const UConverterImpl _ASCIIImpl={
     713             :     UCNV_US_ASCII,
     714             : 
     715             :     NULL,
     716             :     NULL,
     717             : 
     718             :     NULL,
     719             :     NULL,
     720             :     NULL,
     721             : 
     722             :     _ASCIIToUnicodeWithOffsets,
     723             :     _ASCIIToUnicodeWithOffsets,
     724             :     _Latin1FromUnicodeWithOffsets,
     725             :     _Latin1FromUnicodeWithOffsets,
     726             :     _ASCIIGetNextUChar,
     727             : 
     728             :     NULL,
     729             :     NULL,
     730             :     NULL,
     731             :     NULL,
     732             :     _ASCIIGetUnicodeSet,
     733             : 
     734             :     NULL,
     735             :     ucnv_ASCIIFromUTF8
     736             : };
     737             : 
     738             : static const UConverterStaticData _ASCIIStaticData={
     739             :     sizeof(UConverterStaticData),
     740             :     "US-ASCII",
     741             :     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
     742             :     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
     743             :     0,
     744             :     0,
     745             :     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
     746             : };
     747             : 
     748             : const UConverterSharedData _ASCIIData=
     749             :         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
     750             : 
     751             : #endif

Generated by: LCOV version 1.13