LCOV - code coverage report
Current view: top level - intl/icu/source/common - uiter.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 452 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 51 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2002-2012, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  uiter.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2002jan18
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #include "unicode/utypes.h"
      20             : #include "unicode/ustring.h"
      21             : #include "unicode/chariter.h"
      22             : #include "unicode/rep.h"
      23             : #include "unicode/uiter.h"
      24             : #include "unicode/utf.h"
      25             : #include "unicode/utf8.h"
      26             : #include "unicode/utf16.h"
      27             : #include "cstring.h"
      28             : 
      29             : U_NAMESPACE_USE
      30             : 
      31             : #define IS_EVEN(n) (((n)&1)==0)
      32             : #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
      33             : 
      34             : U_CDECL_BEGIN
      35             : 
      36             : /* No-Op UCharIterator implementation for illegal input --------------------- */
      37             : 
      38             : static int32_t U_CALLCONV
      39           0 : noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
      40           0 :     return 0;
      41             : }
      42             : 
      43             : static int32_t U_CALLCONV
      44           0 : noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
      45           0 :     return 0;
      46             : }
      47             : 
      48             : static UBool U_CALLCONV
      49           0 : noopHasNext(UCharIterator * /*iter*/) {
      50           0 :     return FALSE;
      51             : }
      52             : 
      53             : static UChar32 U_CALLCONV
      54           0 : noopCurrent(UCharIterator * /*iter*/) {
      55           0 :     return U_SENTINEL;
      56             : }
      57             : 
      58             : static uint32_t U_CALLCONV
      59           0 : noopGetState(const UCharIterator * /*iter*/) {
      60           0 :     return UITER_NO_STATE;
      61             : }
      62             : 
      63             : static void U_CALLCONV
      64           0 : noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
      65           0 :     *pErrorCode=U_UNSUPPORTED_ERROR;
      66           0 : }
      67             : 
      68             : static const UCharIterator noopIterator={
      69             :     0, 0, 0, 0, 0, 0,
      70             :     noopGetIndex,
      71             :     noopMove,
      72             :     noopHasNext,
      73             :     noopHasNext,
      74             :     noopCurrent,
      75             :     noopCurrent,
      76             :     noopCurrent,
      77             :     NULL,
      78             :     noopGetState,
      79             :     noopSetState
      80             : };
      81             : 
      82             : /* UCharIterator implementation for simple strings -------------------------- */
      83             : 
      84             : /*
      85             :  * This is an implementation of a code unit (UChar) iterator
      86             :  * for UChar * strings.
      87             :  *
      88             :  * The UCharIterator.context field holds a pointer to the string.
      89             :  */
      90             : 
      91             : static int32_t U_CALLCONV
      92           0 : stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
      93           0 :     switch(origin) {
      94             :     case UITER_ZERO:
      95           0 :         return 0;
      96             :     case UITER_START:
      97           0 :         return iter->start;
      98             :     case UITER_CURRENT:
      99           0 :         return iter->index;
     100             :     case UITER_LIMIT:
     101           0 :         return iter->limit;
     102             :     case UITER_LENGTH:
     103           0 :         return iter->length;
     104             :     default:
     105             :         /* not a valid origin */
     106             :         /* Should never get here! */
     107           0 :         return -1;
     108             :     }
     109             : }
     110             : 
     111             : static int32_t U_CALLCONV
     112           0 : stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
     113             :     int32_t pos;
     114             : 
     115           0 :     switch(origin) {
     116             :     case UITER_ZERO:
     117           0 :         pos=delta;
     118           0 :         break;
     119             :     case UITER_START:
     120           0 :         pos=iter->start+delta;
     121           0 :         break;
     122             :     case UITER_CURRENT:
     123           0 :         pos=iter->index+delta;
     124           0 :         break;
     125             :     case UITER_LIMIT:
     126           0 :         pos=iter->limit+delta;
     127           0 :         break;
     128             :     case UITER_LENGTH:
     129           0 :         pos=iter->length+delta;
     130           0 :         break;
     131             :     default:
     132           0 :         return -1;  /* Error */
     133             :     }
     134             : 
     135           0 :     if(pos<iter->start) {
     136           0 :         pos=iter->start;
     137           0 :     } else if(pos>iter->limit) {
     138           0 :         pos=iter->limit;
     139             :     }
     140             : 
     141           0 :     return iter->index=pos;
     142             : }
     143             : 
     144             : static UBool U_CALLCONV
     145           0 : stringIteratorHasNext(UCharIterator *iter) {
     146           0 :     return iter->index<iter->limit;
     147             : }
     148             : 
     149             : static UBool U_CALLCONV
     150           0 : stringIteratorHasPrevious(UCharIterator *iter) {
     151           0 :     return iter->index>iter->start;
     152             : }
     153             : 
     154             : static UChar32 U_CALLCONV
     155           0 : stringIteratorCurrent(UCharIterator *iter) {
     156           0 :     if(iter->index<iter->limit) {
     157           0 :         return ((const UChar *)(iter->context))[iter->index];
     158             :     } else {
     159           0 :         return U_SENTINEL;
     160             :     }
     161             : }
     162             : 
     163             : static UChar32 U_CALLCONV
     164           0 : stringIteratorNext(UCharIterator *iter) {
     165           0 :     if(iter->index<iter->limit) {
     166           0 :         return ((const UChar *)(iter->context))[iter->index++];
     167             :     } else {
     168           0 :         return U_SENTINEL;
     169             :     }
     170             : }
     171             : 
     172             : static UChar32 U_CALLCONV
     173           0 : stringIteratorPrevious(UCharIterator *iter) {
     174           0 :     if(iter->index>iter->start) {
     175           0 :         return ((const UChar *)(iter->context))[--iter->index];
     176             :     } else {
     177           0 :         return U_SENTINEL;
     178             :     }
     179             : }
     180             : 
     181             : static uint32_t U_CALLCONV
     182           0 : stringIteratorGetState(const UCharIterator *iter) {
     183           0 :     return (uint32_t)iter->index;
     184             : }
     185             : 
     186             : static void U_CALLCONV
     187           0 : stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
     188           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     189             :         /* do nothing */
     190           0 :     } else if(iter==NULL) {
     191           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     192           0 :     } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) {
     193           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     194             :     } else {
     195           0 :         iter->index=(int32_t)state;
     196             :     }
     197           0 : }
     198             : 
     199             : static const UCharIterator stringIterator={
     200             :     0, 0, 0, 0, 0, 0,
     201             :     stringIteratorGetIndex,
     202             :     stringIteratorMove,
     203             :     stringIteratorHasNext,
     204             :     stringIteratorHasPrevious,
     205             :     stringIteratorCurrent,
     206             :     stringIteratorNext,
     207             :     stringIteratorPrevious,
     208             :     NULL,
     209             :     stringIteratorGetState,
     210             :     stringIteratorSetState
     211             : };
     212             : 
     213             : U_CAPI void U_EXPORT2
     214           0 : uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
     215           0 :     if(iter!=0) {
     216           0 :         if(s!=0 && length>=-1) {
     217           0 :             *iter=stringIterator;
     218           0 :             iter->context=s;
     219           0 :             if(length>=0) {
     220           0 :                 iter->length=length;
     221             :             } else {
     222           0 :                 iter->length=u_strlen(s);
     223             :             }
     224           0 :             iter->limit=iter->length;
     225             :         } else {
     226           0 :             *iter=noopIterator;
     227             :         }
     228             :     }
     229           0 : }
     230             : 
     231             : /* UCharIterator implementation for UTF-16BE strings ------------------------ */
     232             : 
     233             : /*
     234             :  * This is an implementation of a code unit (UChar) iterator
     235             :  * for UTF-16BE strings, i.e., strings in byte-vectors where
     236             :  * each UChar is stored as a big-endian pair of bytes.
     237             :  *
     238             :  * The UCharIterator.context field holds a pointer to the string.
     239             :  * Everything works just like with a normal UChar iterator (uiter_setString),
     240             :  * except that UChars are assembled from byte pairs.
     241             :  */
     242             : 
     243             : /* internal helper function */
     244             : static inline UChar32
     245           0 : utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
     246           0 :     const uint8_t *p=(const uint8_t *)iter->context;
     247           0 :     return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
     248             : }
     249             : 
     250             : static UChar32 U_CALLCONV
     251           0 : utf16BEIteratorCurrent(UCharIterator *iter) {
     252             :     int32_t index;
     253             : 
     254           0 :     if((index=iter->index)<iter->limit) {
     255           0 :         return utf16BEIteratorGet(iter, index);
     256             :     } else {
     257           0 :         return U_SENTINEL;
     258             :     }
     259             : }
     260             : 
     261             : static UChar32 U_CALLCONV
     262           0 : utf16BEIteratorNext(UCharIterator *iter) {
     263             :     int32_t index;
     264             : 
     265           0 :     if((index=iter->index)<iter->limit) {
     266           0 :         iter->index=index+1;
     267           0 :         return utf16BEIteratorGet(iter, index);
     268             :     } else {
     269           0 :         return U_SENTINEL;
     270             :     }
     271             : }
     272             : 
     273             : static UChar32 U_CALLCONV
     274           0 : utf16BEIteratorPrevious(UCharIterator *iter) {
     275             :     int32_t index;
     276             : 
     277           0 :     if((index=iter->index)>iter->start) {
     278           0 :         iter->index=--index;
     279           0 :         return utf16BEIteratorGet(iter, index);
     280             :     } else {
     281           0 :         return U_SENTINEL;
     282             :     }
     283             : }
     284             : 
     285             : static const UCharIterator utf16BEIterator={
     286             :     0, 0, 0, 0, 0, 0,
     287             :     stringIteratorGetIndex,
     288             :     stringIteratorMove,
     289             :     stringIteratorHasNext,
     290             :     stringIteratorHasPrevious,
     291             :     utf16BEIteratorCurrent,
     292             :     utf16BEIteratorNext,
     293             :     utf16BEIteratorPrevious,
     294             :     NULL,
     295             :     stringIteratorGetState,
     296             :     stringIteratorSetState
     297             : };
     298             : 
     299             : /*
     300             :  * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
     301             :  * i.e., before a pair of 0 bytes where the first 0 byte is at an even
     302             :  * offset from s.
     303             :  */
     304             : static int32_t
     305           0 : utf16BE_strlen(const char *s) {
     306           0 :     if(IS_POINTER_EVEN(s)) {
     307             :         /*
     308             :          * even-aligned, call u_strlen(s)
     309             :          * we are probably on a little-endian machine, but searching for UChar NUL
     310             :          * does not care about endianness
     311             :          */
     312           0 :         return u_strlen((const UChar *)s);
     313             :     } else {
     314             :         /* odd-aligned, search for pair of 0 bytes */
     315           0 :         const char *p=s;
     316             : 
     317           0 :         while(!(*p==0 && p[1]==0)) {
     318           0 :             p+=2;
     319             :         }
     320           0 :         return (int32_t)((p-s)/2);
     321             :     }
     322             : }
     323             : 
     324             : U_CAPI void U_EXPORT2
     325           0 : uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) {
     326           0 :     if(iter!=NULL) {
     327             :         /* allow only even-length strings (the input length counts bytes) */
     328           0 :         if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) {
     329             :             /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
     330           0 :             length>>=1;
     331             : 
     332             :             if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) {
     333             :                 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
     334             :                 uiter_setString(iter, (const UChar *)s, length);
     335             :                 return;
     336             :             }
     337             : 
     338           0 :             *iter=utf16BEIterator;
     339           0 :             iter->context=s;
     340           0 :             if(length>=0) {
     341           0 :                 iter->length=length;
     342             :             } else {
     343           0 :                 iter->length=utf16BE_strlen(s);
     344             :             }
     345           0 :             iter->limit=iter->length;
     346             :         } else {
     347           0 :             *iter=noopIterator;
     348             :         }
     349             :     }
     350             : }
     351             : 
     352             : /* UCharIterator wrapper around CharacterIterator --------------------------- */
     353             : 
     354             : /*
     355             :  * This is wrapper code around a C++ CharacterIterator to
     356             :  * look like a C UCharIterator.
     357             :  *
     358             :  * The UCharIterator.context field holds a pointer to the CharacterIterator.
     359             :  */
     360             : 
     361             : static int32_t U_CALLCONV
     362           0 : characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
     363           0 :     switch(origin) {
     364             :     case UITER_ZERO:
     365           0 :         return 0;
     366             :     case UITER_START:
     367           0 :         return ((CharacterIterator *)(iter->context))->startIndex();
     368             :     case UITER_CURRENT:
     369           0 :         return ((CharacterIterator *)(iter->context))->getIndex();
     370             :     case UITER_LIMIT:
     371           0 :         return ((CharacterIterator *)(iter->context))->endIndex();
     372             :     case UITER_LENGTH:
     373           0 :         return ((CharacterIterator *)(iter->context))->getLength();
     374             :     default:
     375             :         /* not a valid origin */
     376             :         /* Should never get here! */
     377           0 :         return -1;
     378             :     }
     379             : }
     380             : 
     381             : static int32_t U_CALLCONV
     382           0 : characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
     383           0 :     switch(origin) {
     384             :     case UITER_ZERO:
     385           0 :         ((CharacterIterator *)(iter->context))->setIndex(delta);
     386           0 :         return ((CharacterIterator *)(iter->context))->getIndex();
     387             :     case UITER_START:
     388             :     case UITER_CURRENT:
     389             :     case UITER_LIMIT:
     390           0 :         return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
     391             :     case UITER_LENGTH:
     392           0 :         ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);
     393           0 :         return ((CharacterIterator *)(iter->context))->getIndex();
     394             :     default:
     395             :         /* not a valid origin */
     396             :         /* Should never get here! */
     397           0 :         return -1;
     398             :     }
     399             : }
     400             : 
     401             : static UBool U_CALLCONV
     402           0 : characterIteratorHasNext(UCharIterator *iter) {
     403           0 :     return ((CharacterIterator *)(iter->context))->hasNext();
     404             : }
     405             : 
     406             : static UBool U_CALLCONV
     407           0 : characterIteratorHasPrevious(UCharIterator *iter) {
     408           0 :     return ((CharacterIterator *)(iter->context))->hasPrevious();
     409             : }
     410             : 
     411             : static UChar32 U_CALLCONV
     412           0 : characterIteratorCurrent(UCharIterator *iter) {
     413             :     UChar32 c;
     414             : 
     415           0 :     c=((CharacterIterator *)(iter->context))->current();
     416           0 :     if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
     417           0 :         return c;
     418             :     } else {
     419           0 :         return U_SENTINEL;
     420             :     }
     421             : }
     422             : 
     423             : static UChar32 U_CALLCONV
     424           0 : characterIteratorNext(UCharIterator *iter) {
     425           0 :     if(((CharacterIterator *)(iter->context))->hasNext()) {
     426           0 :         return ((CharacterIterator *)(iter->context))->nextPostInc();
     427             :     } else {
     428           0 :         return U_SENTINEL;
     429             :     }
     430             : }
     431             : 
     432             : static UChar32 U_CALLCONV
     433           0 : characterIteratorPrevious(UCharIterator *iter) {
     434           0 :     if(((CharacterIterator *)(iter->context))->hasPrevious()) {
     435           0 :         return ((CharacterIterator *)(iter->context))->previous();
     436             :     } else {
     437           0 :         return U_SENTINEL;
     438             :     }
     439             : }
     440             : 
     441             : static uint32_t U_CALLCONV
     442           0 : characterIteratorGetState(const UCharIterator *iter) {
     443           0 :     return ((CharacterIterator *)(iter->context))->getIndex();
     444             : }
     445             : 
     446             : static void U_CALLCONV
     447           0 : characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
     448           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     449             :         /* do nothing */
     450           0 :     } else if(iter==NULL || iter->context==NULL) {
     451           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     452           0 :     } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) {
     453           0 :         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     454             :     } else {
     455           0 :         ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);
     456             :     }
     457           0 : }
     458             : 
     459             : static const UCharIterator characterIteratorWrapper={
     460             :     0, 0, 0, 0, 0, 0,
     461             :     characterIteratorGetIndex,
     462             :     characterIteratorMove,
     463             :     characterIteratorHasNext,
     464             :     characterIteratorHasPrevious,
     465             :     characterIteratorCurrent,
     466             :     characterIteratorNext,
     467             :     characterIteratorPrevious,
     468             :     NULL,
     469             :     characterIteratorGetState,
     470             :     characterIteratorSetState
     471             : };
     472             : 
     473             : U_CAPI void U_EXPORT2
     474           0 : uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
     475           0 :     if(iter!=0) {
     476           0 :         if(charIter!=0) {
     477           0 :             *iter=characterIteratorWrapper;
     478           0 :             iter->context=charIter;
     479             :         } else {
     480           0 :             *iter=noopIterator;
     481             :         }
     482             :     }
     483           0 : }
     484             : 
     485             : /* UCharIterator wrapper around Replaceable --------------------------------- */
     486             : 
     487             : /*
     488             :  * This is an implementation of a code unit (UChar) iterator
     489             :  * based on a Replaceable object.
     490             :  *
     491             :  * The UCharIterator.context field holds a pointer to the Replaceable.
     492             :  * UCharIterator.length and UCharIterator.index hold Replaceable.length()
     493             :  * and the iteration index.
     494             :  */
     495             : 
     496             : static UChar32 U_CALLCONV
     497           0 : replaceableIteratorCurrent(UCharIterator *iter) {
     498           0 :     if(iter->index<iter->limit) {
     499           0 :         return ((Replaceable *)(iter->context))->charAt(iter->index);
     500             :     } else {
     501           0 :         return U_SENTINEL;
     502             :     }
     503             : }
     504             : 
     505             : static UChar32 U_CALLCONV
     506           0 : replaceableIteratorNext(UCharIterator *iter) {
     507           0 :     if(iter->index<iter->limit) {
     508           0 :         return ((Replaceable *)(iter->context))->charAt(iter->index++);
     509             :     } else {
     510           0 :         return U_SENTINEL;
     511             :     }
     512             : }
     513             : 
     514             : static UChar32 U_CALLCONV
     515           0 : replaceableIteratorPrevious(UCharIterator *iter) {
     516           0 :     if(iter->index>iter->start) {
     517           0 :         return ((Replaceable *)(iter->context))->charAt(--iter->index);
     518             :     } else {
     519           0 :         return U_SENTINEL;
     520             :     }
     521             : }
     522             : 
     523             : static const UCharIterator replaceableIterator={
     524             :     0, 0, 0, 0, 0, 0,
     525             :     stringIteratorGetIndex,
     526             :     stringIteratorMove,
     527             :     stringIteratorHasNext,
     528             :     stringIteratorHasPrevious,
     529             :     replaceableIteratorCurrent,
     530             :     replaceableIteratorNext,
     531             :     replaceableIteratorPrevious,
     532             :     NULL,
     533             :     stringIteratorGetState,
     534             :     stringIteratorSetState
     535             : };
     536             : 
     537             : U_CAPI void U_EXPORT2
     538           0 : uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
     539           0 :     if(iter!=0) {
     540           0 :         if(rep!=0) {
     541           0 :             *iter=replaceableIterator;
     542           0 :             iter->context=rep;
     543           0 :             iter->limit=iter->length=rep->length();
     544             :         } else {
     545           0 :             *iter=noopIterator;
     546             :         }
     547             :     }
     548           0 : }
     549             : 
     550             : /* UCharIterator implementation for UTF-8 strings --------------------------- */
     551             : 
     552             : /*
     553             :  * Possible, probably necessary only for an implementation for arbitrary
     554             :  * converters:
     555             :  * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
     556             :  * This would require to turn reservedFn into a close function and
     557             :  * to introduce a uiter_close(iter).
     558             :  */
     559             : 
     560             : #define UITER_CNV_CAPACITY 16
     561             : 
     562             : /*
     563             :  * Minimal implementation:
     564             :  * Maintain a single-UChar buffer for an additional surrogate.
     565             :  * The caller must not modify start and limit because they are used internally.
     566             :  *
     567             :  * Use UCharIterator fields as follows:
     568             :  *   context        pointer to UTF-8 string
     569             :  *   length         UTF-16 length of the string; -1 until lazy evaluation
     570             :  *   start          current UTF-8 index
     571             :  *   index          current UTF-16 index; may be -1="unknown" after setState()
     572             :  *   limit          UTF-8 length of the string
     573             :  *   reservedField  supplementary code point
     574             :  *
     575             :  * Since UCharIterator delivers 16-bit code units, the iteration can be
     576             :  * currently in the middle of the byte sequence for a supplementary code point.
     577             :  * In this case, reservedField will contain that code point and start will
     578             :  * point to after the corresponding byte sequence. The UTF-16 index will be
     579             :  * one less than what it would otherwise be corresponding to the UTF-8 index.
     580             :  * Otherwise, reservedField will be 0.
     581             :  */
     582             : 
     583             : /*
     584             :  * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
     585             :  * Add implementations that do not call strlen() for iteration but check for NUL.
     586             :  */
     587             : 
     588             : static int32_t U_CALLCONV
     589           0 : utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
     590           0 :     switch(origin) {
     591             :     case UITER_ZERO:
     592             :     case UITER_START:
     593           0 :         return 0;
     594             :     case UITER_CURRENT:
     595           0 :         if(iter->index<0) {
     596             :             /* the current UTF-16 index is unknown after setState(), count from the beginning */
     597             :             const uint8_t *s;
     598             :             UChar32 c;
     599             :             int32_t i, limit, index;
     600             : 
     601           0 :             s=(const uint8_t *)iter->context;
     602           0 :             i=index=0;
     603           0 :             limit=iter->start; /* count up to the UTF-8 index */
     604           0 :             while(i<limit) {
     605           0 :                 U8_NEXT_OR_FFFD(s, i, limit, c);
     606           0 :                 index+=U16_LENGTH(c);
     607             :             }
     608             : 
     609           0 :             iter->start=i; /* just in case setState() did not get us to a code point boundary */
     610           0 :             if(i==iter->limit) {
     611           0 :                 iter->length=index; /* in case it was <0 or wrong */
     612             :             }
     613           0 :             if(iter->reservedField!=0) {
     614           0 :                 --index; /* we are in the middle of a supplementary code point */
     615             :             }
     616           0 :             iter->index=index;
     617             :         }
     618           0 :         return iter->index;
     619             :     case UITER_LIMIT:
     620             :     case UITER_LENGTH:
     621           0 :         if(iter->length<0) {
     622             :             const uint8_t *s;
     623             :             UChar32 c;
     624             :             int32_t i, limit, length;
     625             : 
     626           0 :             s=(const uint8_t *)iter->context;
     627           0 :             if(iter->index<0) {
     628             :                 /*
     629             :                  * the current UTF-16 index is unknown after setState(),
     630             :                  * we must first count from the beginning to here
     631             :                  */
     632           0 :                 i=length=0;
     633           0 :                 limit=iter->start;
     634             : 
     635             :                 /* count from the beginning to the current index */
     636           0 :                 while(i<limit) {
     637           0 :                     U8_NEXT_OR_FFFD(s, i, limit, c);
     638           0 :                     length+=U16_LENGTH(c);
     639             :                 }
     640             : 
     641             :                 /* assume i==limit==iter->start, set the UTF-16 index */
     642           0 :                 iter->start=i; /* just in case setState() did not get us to a code point boundary */
     643           0 :                 iter->index= iter->reservedField!=0 ? length-1 : length;
     644             :             } else {
     645           0 :                 i=iter->start;
     646           0 :                 length=iter->index;
     647           0 :                 if(iter->reservedField!=0) {
     648           0 :                     ++length;
     649             :                 }
     650             :             }
     651             : 
     652             :             /* count from the current index to the end */
     653           0 :             limit=iter->limit;
     654           0 :             while(i<limit) {
     655           0 :                 U8_NEXT_OR_FFFD(s, i, limit, c);
     656           0 :                 length+=U16_LENGTH(c);
     657             :             }
     658           0 :             iter->length=length;
     659             :         }
     660           0 :         return iter->length;
     661             :     default:
     662             :         /* not a valid origin */
     663             :         /* Should never get here! */
     664           0 :         return -1;
     665             :     }
     666             : }
     667             : 
     668             : static int32_t U_CALLCONV
     669           0 : utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
     670             :     const uint8_t *s;
     671             :     UChar32 c;
     672             :     int32_t pos; /* requested UTF-16 index */
     673             :     int32_t i; /* UTF-8 index */
     674             :     UBool havePos;
     675             : 
     676             :     /* calculate the requested UTF-16 index */
     677           0 :     switch(origin) {
     678             :     case UITER_ZERO:
     679             :     case UITER_START:
     680           0 :         pos=delta;
     681           0 :         havePos=TRUE;
     682             :         /* iter->index<0 (unknown) is possible */
     683           0 :         break;
     684             :     case UITER_CURRENT:
     685           0 :         if(iter->index>=0) {
     686           0 :             pos=iter->index+delta;
     687           0 :             havePos=TRUE;
     688             :         } else {
     689             :             /* the current UTF-16 index is unknown after setState(), use only delta */
     690           0 :             pos=0;
     691           0 :             havePos=FALSE;
     692             :         }
     693           0 :         break;
     694             :     case UITER_LIMIT:
     695             :     case UITER_LENGTH:
     696           0 :         if(iter->length>=0) {
     697           0 :             pos=iter->length+delta;
     698           0 :             havePos=TRUE;
     699             :         } else {
     700             :             /* pin to the end, avoid counting the length */
     701           0 :             iter->index=-1;
     702           0 :             iter->start=iter->limit;
     703           0 :             iter->reservedField=0;
     704           0 :             if(delta>=0) {
     705           0 :                 return UITER_UNKNOWN_INDEX;
     706             :             } else {
     707             :                 /* the current UTF-16 index is unknown, use only delta */
     708           0 :                 pos=0;
     709           0 :                 havePos=FALSE;
     710             :             }
     711             :         }
     712           0 :         break;
     713             :     default:
     714           0 :         return -1;  /* Error */
     715             :     }
     716             : 
     717           0 :     if(havePos) {
     718             :         /* shortcuts: pinning to the edges of the string */
     719           0 :         if(pos<=0) {
     720           0 :             iter->index=iter->start=iter->reservedField=0;
     721           0 :             return 0;
     722           0 :         } else if(iter->length>=0 && pos>=iter->length) {
     723           0 :             iter->index=iter->length;
     724           0 :             iter->start=iter->limit;
     725           0 :             iter->reservedField=0;
     726           0 :             return iter->index;
     727             :         }
     728             : 
     729             :         /* minimize the number of U8_NEXT/PREV operations */
     730           0 :         if(iter->index<0 || pos<iter->index/2) {
     731             :             /* go forward from the start instead of backward from the current index */
     732           0 :             iter->index=iter->start=iter->reservedField=0;
     733           0 :         } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) {
     734             :             /*
     735             :              * if we have the UTF-16 index and length and the new position is
     736             :              * closer to the end than the current index,
     737             :              * then go backward from the end instead of forward from the current index
     738             :              */
     739           0 :             iter->index=iter->length;
     740           0 :             iter->start=iter->limit;
     741           0 :             iter->reservedField=0;
     742             :         }
     743             : 
     744           0 :         delta=pos-iter->index;
     745           0 :         if(delta==0) {
     746           0 :             return iter->index; /* nothing to do */
     747             :         }
     748             :     } else {
     749             :         /* move relative to unknown UTF-16 index */
     750           0 :         if(delta==0) {
     751           0 :             return UITER_UNKNOWN_INDEX; /* nothing to do */
     752           0 :         } else if(-delta>=iter->start) {
     753             :             /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
     754           0 :             iter->index=iter->start=iter->reservedField=0;
     755           0 :             return 0;
     756           0 :         } else if(delta>=(iter->limit-iter->start)) {
     757             :             /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
     758           0 :             iter->index=iter->length; /* may or may not be <0 (unknown) */
     759           0 :             iter->start=iter->limit;
     760           0 :             iter->reservedField=0;
     761           0 :             return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
     762             :         }
     763             :     }
     764             : 
     765             :     /* delta!=0 */
     766             : 
     767             :     /* move towards the requested position, pin to the edges of the string */
     768           0 :     s=(const uint8_t *)iter->context;
     769           0 :     pos=iter->index; /* could be <0 (unknown) */
     770           0 :     i=iter->start;
     771           0 :     if(delta>0) {
     772             :         /* go forward */
     773           0 :         int32_t limit=iter->limit;
     774           0 :         if(iter->reservedField!=0) {
     775           0 :             iter->reservedField=0;
     776           0 :             ++pos;
     777           0 :             --delta;
     778             :         }
     779           0 :         while(delta>0 && i<limit) {
     780           0 :             U8_NEXT_OR_FFFD(s, i, limit, c);
     781           0 :             if(c<=0xffff) {
     782           0 :                 ++pos;
     783           0 :                 --delta;
     784           0 :             } else if(delta>=2) {
     785           0 :                 pos+=2;
     786           0 :                 delta-=2;
     787             :             } else /* delta==1 */ {
     788             :                 /* stop in the middle of a supplementary code point */
     789           0 :                 iter->reservedField=c;
     790           0 :                 ++pos;
     791           0 :                 break; /* delta=0; */
     792             :             }
     793             :         }
     794           0 :         if(i==limit) {
     795           0 :             if(iter->length<0 && iter->index>=0) {
     796           0 :                 iter->length= iter->reservedField==0 ? pos : pos+1;
     797           0 :             } else if(iter->index<0 && iter->length>=0) {
     798           0 :                 iter->index= iter->reservedField==0 ? iter->length : iter->length-1;
     799             :             }
     800             :         }
     801             :     } else /* delta<0 */ {
     802             :         /* go backward */
     803           0 :         if(iter->reservedField!=0) {
     804           0 :             iter->reservedField=0;
     805           0 :             i-=4; /* we stayed behind the supplementary code point; go before it now */
     806           0 :             --pos;
     807           0 :             ++delta;
     808             :         }
     809           0 :         while(delta<0 && i>0) {
     810           0 :             U8_PREV_OR_FFFD(s, 0, i, c);
     811           0 :             if(c<=0xffff) {
     812           0 :                 --pos;
     813           0 :                 ++delta;
     814           0 :             } else if(delta<=-2) {
     815           0 :                 pos-=2;
     816           0 :                 delta+=2;
     817             :             } else /* delta==-1 */ {
     818             :                 /* stop in the middle of a supplementary code point */
     819           0 :                 i+=4; /* back to behind this supplementary code point for consistent state */
     820           0 :                 iter->reservedField=c;
     821           0 :                 --pos;
     822           0 :                 break; /* delta=0; */
     823             :             }
     824             :         }
     825             :     }
     826             : 
     827           0 :     iter->start=i;
     828           0 :     if(iter->index>=0) {
     829           0 :         return iter->index=pos;
     830             :     } else {
     831             :         /* we started with index<0 (unknown) so pos is bogus */
     832           0 :         if(i<=1) {
     833           0 :             return iter->index=i; /* reached the beginning */
     834             :         } else {
     835             :             /* we still don't know the UTF-16 index */
     836           0 :             return UITER_UNKNOWN_INDEX;
     837             :         }
     838             :     }
     839             : }
     840             : 
     841             : static UBool U_CALLCONV
     842           0 : utf8IteratorHasNext(UCharIterator *iter) {
     843           0 :     return iter->start<iter->limit || iter->reservedField!=0;
     844             : }
     845             : 
     846             : static UBool U_CALLCONV
     847           0 : utf8IteratorHasPrevious(UCharIterator *iter) {
     848           0 :     return iter->start>0;
     849             : }
     850             : 
     851             : static UChar32 U_CALLCONV
     852           0 : utf8IteratorCurrent(UCharIterator *iter) {
     853           0 :     if(iter->reservedField!=0) {
     854           0 :         return U16_TRAIL(iter->reservedField);
     855           0 :     } else if(iter->start<iter->limit) {
     856           0 :         const uint8_t *s=(const uint8_t *)iter->context;
     857             :         UChar32 c;
     858           0 :         int32_t i=iter->start;
     859             : 
     860           0 :         U8_NEXT_OR_FFFD(s, i, iter->limit, c);
     861           0 :         if(c<=0xffff) {
     862           0 :             return c;
     863             :         } else {
     864           0 :             return U16_LEAD(c);
     865             :         }
     866             :     } else {
     867           0 :         return U_SENTINEL;
     868             :     }
     869             : }
     870             : 
     871             : static UChar32 U_CALLCONV
     872           0 : utf8IteratorNext(UCharIterator *iter) {
     873             :     int32_t index;
     874             : 
     875           0 :     if(iter->reservedField!=0) {
     876           0 :         UChar trail=U16_TRAIL(iter->reservedField);
     877           0 :         iter->reservedField=0;
     878           0 :         if((index=iter->index)>=0) {
     879           0 :             iter->index=index+1;
     880             :         }
     881           0 :         return trail;
     882           0 :     } else if(iter->start<iter->limit) {
     883           0 :         const uint8_t *s=(const uint8_t *)iter->context;
     884             :         UChar32 c;
     885             : 
     886           0 :         U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
     887           0 :         if((index=iter->index)>=0) {
     888           0 :             iter->index=++index;
     889           0 :             if(iter->length<0 && iter->start==iter->limit) {
     890           0 :                 iter->length= c<=0xffff ? index : index+1;
     891             :             }
     892           0 :         } else if(iter->start==iter->limit && iter->length>=0) {
     893           0 :             iter->index= c<=0xffff ? iter->length : iter->length-1;
     894             :         }
     895           0 :         if(c<=0xffff) {
     896           0 :             return c;
     897             :         } else {
     898           0 :             iter->reservedField=c;
     899           0 :             return U16_LEAD(c);
     900             :         }
     901             :     } else {
     902           0 :         return U_SENTINEL;
     903             :     }
     904             : }
     905             : 
     906             : static UChar32 U_CALLCONV
     907           0 : utf8IteratorPrevious(UCharIterator *iter) {
     908             :     int32_t index;
     909             : 
     910           0 :     if(iter->reservedField!=0) {
     911           0 :         UChar lead=U16_LEAD(iter->reservedField);
     912           0 :         iter->reservedField=0;
     913           0 :         iter->start-=4; /* we stayed behind the supplementary code point; go before it now */
     914           0 :         if((index=iter->index)>0) {
     915           0 :             iter->index=index-1;
     916             :         }
     917           0 :         return lead;
     918           0 :     } else if(iter->start>0) {
     919           0 :         const uint8_t *s=(const uint8_t *)iter->context;
     920             :         UChar32 c;
     921             : 
     922           0 :         U8_PREV_OR_FFFD(s, 0, iter->start, c);
     923           0 :         if((index=iter->index)>0) {
     924           0 :             iter->index=index-1;
     925           0 :         } else if(iter->start<=1) {
     926           0 :             iter->index= c<=0xffff ? iter->start : iter->start+1;
     927             :         }
     928           0 :         if(c<=0xffff) {
     929           0 :             return c;
     930             :         } else {
     931           0 :             iter->start+=4; /* back to behind this supplementary code point for consistent state */
     932           0 :             iter->reservedField=c;
     933           0 :             return U16_TRAIL(c);
     934             :         }
     935             :     } else {
     936           0 :         return U_SENTINEL;
     937             :     }
     938             : }
     939             : 
     940             : static uint32_t U_CALLCONV
     941           0 : utf8IteratorGetState(const UCharIterator *iter) {
     942           0 :     uint32_t state=(uint32_t)(iter->start<<1);
     943           0 :     if(iter->reservedField!=0) {
     944           0 :         state|=1;
     945             :     }
     946           0 :     return state;
     947             : }
     948             : 
     949             : static void U_CALLCONV
     950           0 : utf8IteratorSetState(UCharIterator *iter,
     951             :                      uint32_t state,
     952             :                      UErrorCode *pErrorCode)
     953             : {
     954           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     955             :         /* do nothing */
     956           0 :     } else if(iter==NULL) {
     957           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     958           0 :     } else if(state==utf8IteratorGetState(iter)) {
     959             :         /* setting to the current state: no-op */
     960             :     } else {
     961           0 :         int32_t index=(int32_t)(state>>1); /* UTF-8 index */
     962           0 :         state&=1; /* 1 if in surrogate pair, must be index>=4 */
     963             : 
     964           0 :         if((state==0 ? index<0 : index<4) || iter->limit<index) {
     965           0 :             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     966             :         } else {
     967           0 :             iter->start=index; /* restore UTF-8 byte index */
     968           0 :             if(index<=1) {
     969           0 :                 iter->index=index;
     970             :             } else {
     971           0 :                 iter->index=-1; /* unknown UTF-16 index */
     972             :             }
     973           0 :             if(state==0) {
     974           0 :                 iter->reservedField=0;
     975             :             } else {
     976             :                 /* verified index>=4 above */
     977             :                 UChar32 c;
     978           0 :                 U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
     979           0 :                 if(c<=0xffff) {
     980           0 :                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     981             :                 } else {
     982           0 :                     iter->reservedField=c;
     983             :                 }
     984             :             }
     985             :         }
     986             :     }
     987           0 : }
     988             : 
     989             : static const UCharIterator utf8Iterator={
     990             :     0, 0, 0, 0, 0, 0,
     991             :     utf8IteratorGetIndex,
     992             :     utf8IteratorMove,
     993             :     utf8IteratorHasNext,
     994             :     utf8IteratorHasPrevious,
     995             :     utf8IteratorCurrent,
     996             :     utf8IteratorNext,
     997             :     utf8IteratorPrevious,
     998             :     NULL,
     999             :     utf8IteratorGetState,
    1000             :     utf8IteratorSetState
    1001             : };
    1002             : 
    1003             : U_CAPI void U_EXPORT2
    1004           0 : uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
    1005           0 :     if(iter!=0) {
    1006           0 :         if(s!=0 && length>=-1) {
    1007           0 :             *iter=utf8Iterator;
    1008           0 :             iter->context=s;
    1009           0 :             if(length>=0) {
    1010           0 :                 iter->limit=length;
    1011             :             } else {
    1012           0 :                 iter->limit=(int32_t)uprv_strlen(s);
    1013             :             }
    1014           0 :             iter->length= iter->limit<=1 ? iter->limit : -1;
    1015             :         } else {
    1016           0 :             *iter=noopIterator;
    1017             :         }
    1018             :     }
    1019           0 : }
    1020             : 
    1021             : /* Helper functions --------------------------------------------------------- */
    1022             : 
    1023             : U_CAPI UChar32 U_EXPORT2
    1024           0 : uiter_current32(UCharIterator *iter) {
    1025             :     UChar32 c, c2;
    1026             : 
    1027           0 :     c=iter->current(iter);
    1028           0 :     if(U16_IS_SURROGATE(c)) {
    1029           0 :         if(U16_IS_SURROGATE_LEAD(c)) {
    1030             :             /*
    1031             :              * go to the next code unit
    1032             :              * we know that we are not at the limit because c!=U_SENTINEL
    1033             :              */
    1034           0 :             iter->move(iter, 1, UITER_CURRENT);
    1035           0 :             if(U16_IS_TRAIL(c2=iter->current(iter))) {
    1036           0 :                 c=U16_GET_SUPPLEMENTARY(c, c2);
    1037             :             }
    1038             : 
    1039             :             /* undo index movement */
    1040           0 :             iter->move(iter, -1, UITER_CURRENT);
    1041             :         } else {
    1042           0 :             if(U16_IS_LEAD(c2=iter->previous(iter))) {
    1043           0 :                 c=U16_GET_SUPPLEMENTARY(c2, c);
    1044             :             }
    1045           0 :             if(c2>=0) {
    1046             :                 /* undo index movement */
    1047           0 :                 iter->move(iter, 1, UITER_CURRENT);
    1048             :             }
    1049             :         }
    1050             :     }
    1051           0 :     return c;
    1052             : }
    1053             : 
    1054             : U_CAPI UChar32 U_EXPORT2
    1055           0 : uiter_next32(UCharIterator *iter) {
    1056             :     UChar32 c, c2;
    1057             : 
    1058           0 :     c=iter->next(iter);
    1059           0 :     if(U16_IS_LEAD(c)) {
    1060           0 :         if(U16_IS_TRAIL(c2=iter->next(iter))) {
    1061           0 :             c=U16_GET_SUPPLEMENTARY(c, c2);
    1062           0 :         } else if(c2>=0) {
    1063             :             /* unmatched first surrogate, undo index movement */
    1064           0 :             iter->move(iter, -1, UITER_CURRENT);
    1065             :         }
    1066             :     }
    1067           0 :     return c;
    1068             : }
    1069             : 
    1070             : U_CAPI UChar32 U_EXPORT2
    1071           0 : uiter_previous32(UCharIterator *iter) {
    1072             :     UChar32 c, c2;
    1073             : 
    1074           0 :     c=iter->previous(iter);
    1075           0 :     if(U16_IS_TRAIL(c)) {
    1076           0 :         if(U16_IS_LEAD(c2=iter->previous(iter))) {
    1077           0 :             c=U16_GET_SUPPLEMENTARY(c2, c);
    1078           0 :         } else if(c2>=0) {
    1079             :             /* unmatched second surrogate, undo index movement */
    1080           0 :             iter->move(iter, 1, UITER_CURRENT);
    1081             :         }
    1082             :     }
    1083           0 :     return c;
    1084             : }
    1085             : 
    1086             : U_CAPI uint32_t U_EXPORT2
    1087           0 : uiter_getState(const UCharIterator *iter) {
    1088           0 :     if(iter==NULL || iter->getState==NULL) {
    1089           0 :         return UITER_NO_STATE;
    1090             :     } else {
    1091           0 :         return iter->getState(iter);
    1092             :     }
    1093             : }
    1094             : 
    1095             : U_CAPI void U_EXPORT2
    1096           0 : uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
    1097           0 :     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    1098             :         /* do nothing */
    1099           0 :     } else if(iter==NULL) {
    1100           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1101           0 :     } else if(iter->setState==NULL) {
    1102           0 :         *pErrorCode=U_UNSUPPORTED_ERROR;
    1103             :     } else {
    1104           0 :         iter->setState(iter, state, pErrorCode);
    1105             :     }
    1106           0 : }
    1107             : 
    1108             : U_CDECL_END

Generated by: LCOV version 1.13