LCOV - code coverage report
Current view: top level - intl/icu/source/common - ustring.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 10 626 1.6 %
Date: 2017-07-14 16:53:18 Functions: 3 50 6.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 1998-2016, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *
      11             : * File ustring.cpp
      12             : *
      13             : * Modification History:
      14             : *
      15             : *   Date        Name        Description
      16             : *   12/07/98    bertrand    Creation.
      17             : ******************************************************************************
      18             : */
      19             : 
      20             : #include "unicode/utypes.h"
      21             : #include "unicode/putil.h"
      22             : #include "unicode/uchar.h"
      23             : #include "unicode/ustring.h"
      24             : #include "unicode/utf16.h"
      25             : #include "cstring.h"
      26             : #include "cwchar.h"
      27             : #include "cmemory.h"
      28             : #include "ustr_imp.h"
      29             : 
      30             : /* ANSI string.h - style functions ------------------------------------------ */
      31             : 
      32             : /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
      33             : #define U_BMP_MAX 0xffff
      34             : 
      35             : /* Forward binary string search functions ----------------------------------- */
      36             : 
      37             : /*
      38             :  * Test if a substring match inside a string is at code point boundaries.
      39             :  * All pointers refer to the same buffer.
      40             :  * The limit pointer may be NULL, all others must be real pointers.
      41             :  */
      42             : static inline UBool
      43           0 : isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
      44           0 :     if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
      45             :         /* the leading edge of the match is in the middle of a surrogate pair */
      46           0 :         return FALSE;
      47             :     }
      48           0 :     if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
      49             :         /* the trailing edge of the match is in the middle of a surrogate pair */
      50           0 :         return FALSE;
      51             :     }
      52           0 :     return TRUE;
      53             : }
      54             : 
      55             : U_CAPI UChar * U_EXPORT2
      56           0 : u_strFindFirst(const UChar *s, int32_t length,
      57             :                const UChar *sub, int32_t subLength) {
      58             :     const UChar *start, *p, *q, *subLimit;
      59             :     UChar c, cs, cq;
      60             : 
      61           0 :     if(sub==NULL || subLength<-1) {
      62           0 :         return (UChar *)s;
      63             :     }
      64           0 :     if(s==NULL || length<-1) {
      65           0 :         return NULL;
      66             :     }
      67             : 
      68           0 :     start=s;
      69             : 
      70           0 :     if(length<0 && subLength<0) {
      71             :         /* both strings are NUL-terminated */
      72           0 :         if((cs=*sub++)==0) {
      73           0 :             return (UChar *)s;
      74             :         }
      75           0 :         if(*sub==0 && !U16_IS_SURROGATE(cs)) {
      76             :             /* the substring consists of a single, non-surrogate BMP code point */
      77           0 :             return u_strchr(s, cs);
      78             :         }
      79             : 
      80           0 :         while((c=*s++)!=0) {
      81           0 :             if(c==cs) {
      82             :                 /* found first substring UChar, compare rest */
      83           0 :                 p=s;
      84           0 :                 q=sub;
      85             :                 for(;;) {
      86           0 :                     if((cq=*q)==0) {
      87           0 :                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
      88           0 :                             return (UChar *)(s-1); /* well-formed match */
      89             :                         } else {
      90           0 :                             break; /* no match because surrogate pair is split */
      91             :                         }
      92             :                     }
      93           0 :                     if((c=*p)==0) {
      94           0 :                         return NULL; /* no match, and none possible after s */
      95             :                     }
      96           0 :                     if(c!=cq) {
      97           0 :                         break; /* no match */
      98             :                     }
      99           0 :                     ++p;
     100           0 :                     ++q;
     101             :                 }
     102             :             }
     103             :         }
     104             : 
     105             :         /* not found */
     106           0 :         return NULL;
     107             :     }
     108             : 
     109           0 :     if(subLength<0) {
     110           0 :         subLength=u_strlen(sub);
     111             :     }
     112           0 :     if(subLength==0) {
     113           0 :         return (UChar *)s;
     114             :     }
     115             : 
     116             :     /* get sub[0] to search for it fast */
     117           0 :     cs=*sub++;
     118           0 :     --subLength;
     119           0 :     subLimit=sub+subLength;
     120             : 
     121           0 :     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
     122             :         /* the substring consists of a single, non-surrogate BMP code point */
     123           0 :         return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
     124             :     }
     125             : 
     126           0 :     if(length<0) {
     127             :         /* s is NUL-terminated */
     128           0 :         while((c=*s++)!=0) {
     129           0 :             if(c==cs) {
     130             :                 /* found first substring UChar, compare rest */
     131           0 :                 p=s;
     132           0 :                 q=sub;
     133             :                 for(;;) {
     134           0 :                     if(q==subLimit) {
     135           0 :                         if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
     136           0 :                             return (UChar *)(s-1); /* well-formed match */
     137             :                         } else {
     138           0 :                             break; /* no match because surrogate pair is split */
     139             :                         }
     140             :                     }
     141           0 :                     if((c=*p)==0) {
     142           0 :                         return NULL; /* no match, and none possible after s */
     143             :                     }
     144           0 :                     if(c!=*q) {
     145           0 :                         break; /* no match */
     146             :                     }
     147           0 :                     ++p;
     148           0 :                     ++q;
     149             :                 }
     150             :             }
     151             :         }
     152             :     } else {
     153             :         const UChar *limit, *preLimit;
     154             : 
     155             :         /* subLength was decremented above */
     156           0 :         if(length<=subLength) {
     157           0 :             return NULL; /* s is shorter than sub */
     158             :         }
     159             : 
     160           0 :         limit=s+length;
     161             : 
     162             :         /* the substring must start before preLimit */
     163           0 :         preLimit=limit-subLength;
     164             : 
     165           0 :         while(s!=preLimit) {
     166           0 :             c=*s++;
     167           0 :             if(c==cs) {
     168             :                 /* found first substring UChar, compare rest */
     169           0 :                 p=s;
     170           0 :                 q=sub;
     171             :                 for(;;) {
     172           0 :                     if(q==subLimit) {
     173           0 :                         if(isMatchAtCPBoundary(start, s-1, p, limit)) {
     174           0 :                             return (UChar *)(s-1); /* well-formed match */
     175             :                         } else {
     176           0 :                             break; /* no match because surrogate pair is split */
     177             :                         }
     178             :                     }
     179           0 :                     if(*p!=*q) {
     180           0 :                         break; /* no match */
     181             :                     }
     182           0 :                     ++p;
     183           0 :                     ++q;
     184             :                 }
     185             :             }
     186             :         }
     187             :     }
     188             : 
     189             :     /* not found */
     190           0 :     return NULL;
     191             : }
     192             : 
     193             : U_CAPI UChar * U_EXPORT2
     194           0 : u_strstr(const UChar *s, const UChar *substring) {
     195           0 :     return u_strFindFirst(s, -1, substring, -1);
     196             : }
     197             : 
     198             : U_CAPI UChar * U_EXPORT2
     199           0 : u_strchr(const UChar *s, UChar c) {
     200           0 :     if(U16_IS_SURROGATE(c)) {
     201             :         /* make sure to not find half of a surrogate pair */
     202           0 :         return u_strFindFirst(s, -1, &c, 1);
     203             :     } else {
     204             :         UChar cs;
     205             : 
     206             :         /* trivial search for a BMP code point */
     207             :         for(;;) {
     208           0 :             if((cs=*s)==c) {
     209           0 :                 return (UChar *)s;
     210             :             }
     211           0 :             if(cs==0) {
     212           0 :                 return NULL;
     213             :             }
     214           0 :             ++s;
     215             :         }
     216             :     }
     217             : }
     218             : 
     219             : U_CAPI UChar * U_EXPORT2
     220           0 : u_strchr32(const UChar *s, UChar32 c) {
     221           0 :     if((uint32_t)c<=U_BMP_MAX) {
     222             :         /* find BMP code point */
     223           0 :         return u_strchr(s, (UChar)c);
     224           0 :     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
     225             :         /* find supplementary code point as surrogate pair */
     226           0 :         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
     227             : 
     228           0 :         while((cs=*s++)!=0) {
     229           0 :             if(cs==lead && *s==trail) {
     230           0 :                 return (UChar *)(s-1);
     231             :             }
     232             :         }
     233           0 :         return NULL;
     234             :     } else {
     235             :         /* not a Unicode code point, not findable */
     236           0 :         return NULL;
     237             :     }
     238             : }
     239             : 
     240             : U_CAPI UChar * U_EXPORT2
     241           0 : u_memchr(const UChar *s, UChar c, int32_t count) {
     242           0 :     if(count<=0) {
     243           0 :         return NULL; /* no string */
     244           0 :     } else if(U16_IS_SURROGATE(c)) {
     245             :         /* make sure to not find half of a surrogate pair */
     246           0 :         return u_strFindFirst(s, count, &c, 1);
     247             :     } else {
     248             :         /* trivial search for a BMP code point */
     249           0 :         const UChar *limit=s+count;
     250           0 :         do {
     251           0 :             if(*s==c) {
     252           0 :                 return (UChar *)s;
     253             :             }
     254             :         } while(++s!=limit);
     255           0 :         return NULL;
     256             :     }
     257             : }
     258             : 
     259             : U_CAPI UChar * U_EXPORT2
     260           0 : u_memchr32(const UChar *s, UChar32 c, int32_t count) {
     261           0 :     if((uint32_t)c<=U_BMP_MAX) {
     262             :         /* find BMP code point */
     263           0 :         return u_memchr(s, (UChar)c, count);
     264           0 :     } else if(count<2) {
     265             :         /* too short for a surrogate pair */
     266           0 :         return NULL;
     267           0 :     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
     268             :         /* find supplementary code point as surrogate pair */
     269           0 :         const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
     270           0 :         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
     271             : 
     272           0 :         do {
     273           0 :             if(*s==lead && *(s+1)==trail) {
     274           0 :                 return (UChar *)s;
     275             :             }
     276             :         } while(++s!=limit);
     277           0 :         return NULL;
     278             :     } else {
     279             :         /* not a Unicode code point, not findable */
     280           0 :         return NULL;
     281             :     }
     282             : }
     283             : 
     284             : /* Backward binary string search functions ---------------------------------- */
     285             : 
     286             : U_CAPI UChar * U_EXPORT2
     287           0 : u_strFindLast(const UChar *s, int32_t length,
     288             :               const UChar *sub, int32_t subLength) {
     289             :     const UChar *start, *limit, *p, *q, *subLimit;
     290             :     UChar c, cs;
     291             : 
     292           0 :     if(sub==NULL || subLength<-1) {
     293           0 :         return (UChar *)s;
     294             :     }
     295           0 :     if(s==NULL || length<-1) {
     296           0 :         return NULL;
     297             :     }
     298             : 
     299             :     /*
     300             :      * This implementation is more lazy than the one for u_strFindFirst():
     301             :      * There is no special search code for NUL-terminated strings.
     302             :      * It does not seem to be worth it for searching substrings to
     303             :      * search forward and find all matches like in u_strrchr() and similar.
     304             :      * Therefore, we simply get both string lengths and search backward.
     305             :      *
     306             :      * markus 2002oct23
     307             :      */
     308             : 
     309           0 :     if(subLength<0) {
     310           0 :         subLength=u_strlen(sub);
     311             :     }
     312           0 :     if(subLength==0) {
     313           0 :         return (UChar *)s;
     314             :     }
     315             : 
     316             :     /* get sub[subLength-1] to search for it fast */
     317           0 :     subLimit=sub+subLength;
     318           0 :     cs=*(--subLimit);
     319           0 :     --subLength;
     320             : 
     321           0 :     if(subLength==0 && !U16_IS_SURROGATE(cs)) {
     322             :         /* the substring consists of a single, non-surrogate BMP code point */
     323           0 :         return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
     324             :     }
     325             : 
     326           0 :     if(length<0) {
     327           0 :         length=u_strlen(s);
     328             :     }
     329             : 
     330             :     /* subLength was decremented above */
     331           0 :     if(length<=subLength) {
     332           0 :         return NULL; /* s is shorter than sub */
     333             :     }
     334             : 
     335           0 :     start=s;
     336           0 :     limit=s+length;
     337             : 
     338             :     /* the substring must start no later than s+subLength */
     339           0 :     s+=subLength;
     340             : 
     341           0 :     while(s!=limit) {
     342           0 :         c=*(--limit);
     343           0 :         if(c==cs) {
     344             :             /* found last substring UChar, compare rest */
     345           0 :             p=limit;
     346           0 :             q=subLimit;
     347             :             for(;;) {
     348           0 :                 if(q==sub) {
     349           0 :                     if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
     350           0 :                         return (UChar *)p; /* well-formed match */
     351             :                     } else {
     352           0 :                         break; /* no match because surrogate pair is split */
     353             :                     }
     354             :                 }
     355           0 :                 if(*(--p)!=*(--q)) {
     356           0 :                     break; /* no match */
     357             :                 }
     358             :             }
     359             :         }
     360             :     }
     361             : 
     362             :     /* not found */
     363           0 :     return NULL;
     364             : }
     365             : 
     366             : U_CAPI UChar * U_EXPORT2
     367           0 : u_strrstr(const UChar *s, const UChar *substring) {
     368           0 :     return u_strFindLast(s, -1, substring, -1);
     369             : }
     370             : 
     371             : U_CAPI UChar * U_EXPORT2
     372           0 : u_strrchr(const UChar *s, UChar c) {
     373           0 :     if(U16_IS_SURROGATE(c)) {
     374             :         /* make sure to not find half of a surrogate pair */
     375           0 :         return u_strFindLast(s, -1, &c, 1);
     376             :     } else {
     377           0 :         const UChar *result=NULL;
     378             :         UChar cs;
     379             : 
     380             :         /* trivial search for a BMP code point */
     381             :         for(;;) {
     382           0 :             if((cs=*s)==c) {
     383           0 :                 result=s;
     384             :             }
     385           0 :             if(cs==0) {
     386           0 :                 return (UChar *)result;
     387             :             }
     388           0 :             ++s;
     389             :         }
     390             :     }
     391             : }
     392             : 
     393             : U_CAPI UChar * U_EXPORT2
     394           0 : u_strrchr32(const UChar *s, UChar32 c) {
     395           0 :     if((uint32_t)c<=U_BMP_MAX) {
     396             :         /* find BMP code point */
     397           0 :         return u_strrchr(s, (UChar)c);
     398           0 :     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
     399             :         /* find supplementary code point as surrogate pair */
     400           0 :         const UChar *result=NULL;
     401           0 :         UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
     402             : 
     403           0 :         while((cs=*s++)!=0) {
     404           0 :             if(cs==lead && *s==trail) {
     405           0 :                 result=s-1;
     406             :             }
     407             :         }
     408           0 :         return (UChar *)result;
     409             :     } else {
     410             :         /* not a Unicode code point, not findable */
     411           0 :         return NULL;
     412             :     }
     413             : }
     414             : 
     415             : U_CAPI UChar * U_EXPORT2
     416           0 : u_memrchr(const UChar *s, UChar c, int32_t count) {
     417           0 :     if(count<=0) {
     418           0 :         return NULL; /* no string */
     419           0 :     } else if(U16_IS_SURROGATE(c)) {
     420             :         /* make sure to not find half of a surrogate pair */
     421           0 :         return u_strFindLast(s, count, &c, 1);
     422             :     } else {
     423             :         /* trivial search for a BMP code point */
     424           0 :         const UChar *limit=s+count;
     425           0 :         do {
     426           0 :             if(*(--limit)==c) {
     427           0 :                 return (UChar *)limit;
     428             :             }
     429           0 :         } while(s!=limit);
     430           0 :         return NULL;
     431             :     }
     432             : }
     433             : 
     434             : U_CAPI UChar * U_EXPORT2
     435           0 : u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
     436           0 :     if((uint32_t)c<=U_BMP_MAX) {
     437             :         /* find BMP code point */
     438           0 :         return u_memrchr(s, (UChar)c, count);
     439           0 :     } else if(count<2) {
     440             :         /* too short for a surrogate pair */
     441           0 :         return NULL;
     442           0 :     } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
     443             :         /* find supplementary code point as surrogate pair */
     444           0 :         const UChar *limit=s+count-1;
     445           0 :         UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
     446             : 
     447           0 :         do {
     448           0 :             if(*limit==trail && *(limit-1)==lead) {
     449           0 :                 return (UChar *)(limit-1);
     450             :             }
     451             :         } while(s!=--limit);
     452           0 :         return NULL;
     453             :     } else {
     454             :         /* not a Unicode code point, not findable */
     455           0 :         return NULL;
     456             :     }
     457             : }
     458             : 
     459             : /* Tokenization functions --------------------------------------------------- */
     460             : 
     461             : /*
     462             :  * Match each code point in a string against each code point in the matchSet.
     463             :  * Return the index of the first string code point that
     464             :  * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
     465             :  * Return -(string length)-1 if there is no such code point.
     466             :  */
     467             : static int32_t
     468           0 : _matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
     469             :     int32_t matchLen, matchBMPLen, strItr, matchItr;
     470             :     UChar32 stringCh, matchCh;
     471             :     UChar c, c2;
     472             : 
     473             :     /* first part of matchSet contains only BMP code points */
     474           0 :     matchBMPLen = 0;
     475           0 :     while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
     476           0 :         ++matchBMPLen;
     477             :     }
     478             : 
     479             :     /* second part of matchSet contains BMP and supplementary code points */
     480           0 :     matchLen = matchBMPLen;
     481           0 :     while(matchSet[matchLen] != 0) {
     482           0 :         ++matchLen;
     483             :     }
     484             : 
     485           0 :     for(strItr = 0; (c = string[strItr]) != 0;) {
     486           0 :         ++strItr;
     487           0 :         if(U16_IS_SINGLE(c)) {
     488           0 :             if(polarity) {
     489           0 :                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
     490           0 :                     if(c == matchSet[matchItr]) {
     491           0 :                         return strItr - 1; /* one matches */
     492             :                     }
     493             :                 }
     494             :             } else {
     495           0 :                 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
     496           0 :                     if(c == matchSet[matchItr]) {
     497           0 :                         goto endloop;
     498             :                     }
     499             :                 }
     500           0 :                 return strItr - 1; /* none matches */
     501             :             }
     502             :         } else {
     503             :             /*
     504             :              * No need to check for string length before U16_IS_TRAIL
     505             :              * because c2 could at worst be the terminating NUL.
     506             :              */
     507           0 :             if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
     508           0 :                 ++strItr;
     509           0 :                 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
     510             :             } else {
     511           0 :                 stringCh = c; /* unpaired trail surrogate */
     512             :             }
     513             : 
     514           0 :             if(polarity) {
     515           0 :                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
     516           0 :                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
     517           0 :                     if(stringCh == matchCh) {
     518           0 :                         return strItr - U16_LENGTH(stringCh); /* one matches */
     519             :                     }
     520             :                 }
     521             :             } else {
     522           0 :                 for(matchItr = matchBMPLen; matchItr < matchLen;) {
     523           0 :                     U16_NEXT(matchSet, matchItr, matchLen, matchCh);
     524           0 :                     if(stringCh == matchCh) {
     525           0 :                         goto endloop;
     526             :                     }
     527             :                 }
     528           0 :                 return strItr - U16_LENGTH(stringCh); /* none matches */
     529             :             }
     530             :         }
     531             : endloop:
     532             :         /* wish C had continue with labels like Java... */;
     533             :     }
     534             : 
     535             :     /* Didn't find it. */
     536           0 :     return -strItr-1;
     537             : }
     538             : 
     539             : /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
     540             : U_CAPI UChar * U_EXPORT2
     541           0 : u_strpbrk(const UChar *string, const UChar *matchSet)
     542             : {
     543           0 :     int32_t idx = _matchFromSet(string, matchSet, TRUE);
     544           0 :     if(idx >= 0) {
     545           0 :         return (UChar *)string + idx;
     546             :     } else {
     547           0 :         return NULL;
     548             :     }
     549             : }
     550             : 
     551             : /* Search for a codepoint in a string that matches one of the matchSet codepoints. */
     552             : U_CAPI int32_t U_EXPORT2
     553           0 : u_strcspn(const UChar *string, const UChar *matchSet)
     554             : {
     555           0 :     int32_t idx = _matchFromSet(string, matchSet, TRUE);
     556           0 :     if(idx >= 0) {
     557           0 :         return idx;
     558             :     } else {
     559           0 :         return -idx - 1; /* == u_strlen(string) */
     560             :     }
     561             : }
     562             : 
     563             : /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
     564             : U_CAPI int32_t U_EXPORT2
     565           0 : u_strspn(const UChar *string, const UChar *matchSet)
     566             : {
     567           0 :     int32_t idx = _matchFromSet(string, matchSet, FALSE);
     568           0 :     if(idx >= 0) {
     569           0 :         return idx;
     570             :     } else {
     571           0 :         return -idx - 1; /* == u_strlen(string) */
     572             :     }
     573             : }
     574             : 
     575             : /* ----- Text manipulation functions --- */
     576             : 
     577             : U_CAPI UChar* U_EXPORT2
     578           0 : u_strtok_r(UChar    *src, 
     579             :      const UChar    *delim,
     580             :            UChar   **saveState)
     581             : {
     582             :     UChar *tokSource;
     583             :     UChar *nextToken;
     584             :     uint32_t nonDelimIdx;
     585             : 
     586             :     /* If saveState is NULL, the user messed up. */
     587           0 :     if (src != NULL) {
     588           0 :         tokSource = src;
     589           0 :         *saveState = src; /* Set to "src" in case there are no delimiters */
     590             :     }
     591           0 :     else if (*saveState) {
     592           0 :         tokSource = *saveState;
     593             :     }
     594             :     else {
     595             :         /* src == NULL && *saveState == NULL */
     596             :         /* This shouldn't happen. We already finished tokenizing. */
     597           0 :         return NULL;
     598             :     }
     599             : 
     600             :     /* Skip initial delimiters */
     601           0 :     nonDelimIdx = u_strspn(tokSource, delim);
     602           0 :     tokSource = &tokSource[nonDelimIdx];
     603             : 
     604           0 :     if (*tokSource) {
     605           0 :         nextToken = u_strpbrk(tokSource, delim);
     606           0 :         if (nextToken != NULL) {
     607             :             /* Create a token */
     608           0 :             *(nextToken++) = 0;
     609           0 :             *saveState = nextToken;
     610           0 :             return tokSource;
     611             :         }
     612           0 :         else if (*saveState) {
     613             :             /* Return the last token */
     614           0 :             *saveState = NULL;
     615           0 :             return tokSource;
     616             :         }
     617             :     }
     618             :     else {
     619             :         /* No tokens were found. Only delimiters were left. */
     620           0 :         *saveState = NULL;
     621             :     }
     622           0 :     return NULL;
     623             : }
     624             : 
     625             : /* Miscellaneous functions -------------------------------------------------- */
     626             : 
     627             : U_CAPI UChar* U_EXPORT2
     628           0 : u_strcat(UChar     *dst, 
     629             :     const UChar     *src)
     630             : {
     631           0 :     UChar *anchor = dst;            /* save a pointer to start of dst */
     632             : 
     633           0 :     while(*dst != 0) {              /* To end of first string          */
     634           0 :         ++dst;
     635             :     }
     636           0 :     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
     637             :     }
     638             : 
     639           0 :     return anchor;
     640             : }
     641             : 
     642             : U_CAPI UChar*  U_EXPORT2
     643           0 : u_strncat(UChar     *dst, 
     644             :      const UChar     *src, 
     645             :      int32_t     n ) 
     646             : {
     647           0 :     if(n > 0) {
     648           0 :         UChar *anchor = dst;            /* save a pointer to start of dst */
     649             : 
     650           0 :         while(*dst != 0) {              /* To end of first string          */
     651           0 :             ++dst;
     652             :         }
     653           0 :         while((*dst = *src) != 0) {     /* copy string 2 over              */
     654           0 :             ++dst;
     655           0 :             if(--n == 0) {
     656           0 :                 *dst = 0;
     657           0 :                 break;
     658             :             }
     659           0 :             ++src;
     660             :         }
     661             : 
     662           0 :         return anchor;
     663             :     } else {
     664           0 :         return dst;
     665             :     }
     666             : }
     667             : 
     668             : /* ----- Text property functions --- */
     669             : 
     670             : U_CAPI int32_t   U_EXPORT2
     671           0 : u_strcmp(const UChar *s1, 
     672             :     const UChar *s2) 
     673             : {
     674             :     UChar  c1, c2;
     675             : 
     676             :     for(;;) {
     677           0 :         c1=*s1++;
     678           0 :         c2=*s2++;
     679           0 :         if (c1 != c2 || c1 == 0) {
     680             :             break;
     681             :         }
     682             :     }
     683           0 :     return (int32_t)c1 - (int32_t)c2;
     684             : }
     685             : 
     686             : U_CFUNC int32_t U_EXPORT2
     687           0 : uprv_strCompare(const UChar *s1, int32_t length1,
     688             :                 const UChar *s2, int32_t length2,
     689             :                 UBool strncmpStyle, UBool codePointOrder) {
     690             :     const UChar *start1, *start2, *limit1, *limit2;
     691             :     UChar c1, c2;
     692             : 
     693             :     /* setup for fix-up */
     694           0 :     start1=s1;
     695           0 :     start2=s2;
     696             : 
     697             :     /* compare identical prefixes - they do not need to be fixed up */
     698           0 :     if(length1<0 && length2<0) {
     699             :         /* strcmp style, both NUL-terminated */
     700           0 :         if(s1==s2) {
     701           0 :             return 0;
     702             :         }
     703             : 
     704             :         for(;;) {
     705           0 :             c1=*s1;
     706           0 :             c2=*s2;
     707           0 :             if(c1!=c2) {
     708           0 :                 break;
     709             :             }
     710           0 :             if(c1==0) {
     711           0 :                 return 0;
     712             :             }
     713           0 :             ++s1;
     714           0 :             ++s2;
     715             :         }
     716             : 
     717             :         /* setup for fix-up */
     718           0 :         limit1=limit2=NULL;
     719           0 :     } else if(strncmpStyle) {
     720             :         /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
     721           0 :         if(s1==s2) {
     722           0 :             return 0;
     723             :         }
     724             : 
     725           0 :         limit1=start1+length1;
     726             : 
     727             :         for(;;) {
     728             :             /* both lengths are same, check only one limit */
     729           0 :             if(s1==limit1) {
     730           0 :                 return 0;
     731             :             }
     732             : 
     733           0 :             c1=*s1;
     734           0 :             c2=*s2;
     735           0 :             if(c1!=c2) {
     736           0 :                 break;
     737             :             }
     738           0 :             if(c1==0) {
     739           0 :                 return 0;
     740             :             }
     741           0 :             ++s1;
     742           0 :             ++s2;
     743             :         }
     744             : 
     745             :         /* setup for fix-up */
     746           0 :         limit2=start2+length1; /* use length1 here, too, to enforce assumption */
     747             :     } else {
     748             :         /* memcmp/UnicodeString style, both length-specified */
     749             :         int32_t lengthResult;
     750             : 
     751           0 :         if(length1<0) {
     752           0 :             length1=u_strlen(s1);
     753             :         }
     754           0 :         if(length2<0) {
     755           0 :             length2=u_strlen(s2);
     756             :         }
     757             : 
     758             :         /* limit1=start1+min(lenght1, length2) */
     759           0 :         if(length1<length2) {
     760           0 :             lengthResult=-1;
     761           0 :             limit1=start1+length1;
     762           0 :         } else if(length1==length2) {
     763           0 :             lengthResult=0;
     764           0 :             limit1=start1+length1;
     765             :         } else /* length1>length2 */ {
     766           0 :             lengthResult=1;
     767           0 :             limit1=start1+length2;
     768             :         }
     769             : 
     770           0 :         if(s1==s2) {
     771           0 :             return lengthResult;
     772             :         }
     773             : 
     774             :         for(;;) {
     775             :             /* check pseudo-limit */
     776           0 :             if(s1==limit1) {
     777           0 :                 return lengthResult;
     778             :             }
     779             : 
     780           0 :             c1=*s1;
     781           0 :             c2=*s2;
     782           0 :             if(c1!=c2) {
     783           0 :                 break;
     784             :             }
     785           0 :             ++s1;
     786           0 :             ++s2;
     787             :         }
     788             : 
     789             :         /* setup for fix-up */
     790           0 :         limit1=start1+length1;
     791           0 :         limit2=start2+length2;
     792             :     }
     793             : 
     794             :     /* if both values are in or above the surrogate range, fix them up */
     795           0 :     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
     796             :         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
     797           0 :         if(
     798           0 :             (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
     799           0 :             (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
     800             :         ) {
     801             :             /* part of a surrogate pair, leave >=d800 */
     802             :         } else {
     803             :             /* BMP code point - may be surrogate code point - make <d800 */
     804           0 :             c1-=0x2800;
     805             :         }
     806             : 
     807           0 :         if(
     808           0 :             (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
     809           0 :             (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
     810             :         ) {
     811             :             /* part of a surrogate pair, leave >=d800 */
     812             :         } else {
     813             :             /* BMP code point - may be surrogate code point - make <d800 */
     814           0 :             c2-=0x2800;
     815             :         }
     816             :     }
     817             : 
     818             :     /* now c1 and c2 are in the requested (code unit or code point) order */
     819           0 :     return (int32_t)c1-(int32_t)c2;
     820             : }
     821             : 
     822             : /*
     823             :  * Compare two strings as presented by UCharIterators.
     824             :  * Use code unit or code point order.
     825             :  * When the function returns, it is undefined where the iterators
     826             :  * have stopped.
     827             :  */
     828             : U_CAPI int32_t U_EXPORT2
     829           0 : u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
     830             :     UChar32 c1, c2;
     831             : 
     832             :     /* argument checking */
     833           0 :     if(iter1==NULL || iter2==NULL) {
     834           0 :         return 0; /* bad arguments */
     835             :     }
     836           0 :     if(iter1==iter2) {
     837           0 :         return 0; /* identical iterators */
     838             :     }
     839             : 
     840             :     /* reset iterators to start? */
     841           0 :     iter1->move(iter1, 0, UITER_START);
     842           0 :     iter2->move(iter2, 0, UITER_START);
     843             : 
     844             :     /* compare identical prefixes - they do not need to be fixed up */
     845             :     for(;;) {
     846           0 :         c1=iter1->next(iter1);
     847           0 :         c2=iter2->next(iter2);
     848           0 :         if(c1!=c2) {
     849           0 :             break;
     850             :         }
     851           0 :         if(c1==-1) {
     852           0 :             return 0;
     853             :         }
     854             :     }
     855             : 
     856             :     /* if both values are in or above the surrogate range, fix them up */
     857           0 :     if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
     858             :         /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
     859           0 :         if(
     860           0 :             (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
     861           0 :             (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
     862             :         ) {
     863             :             /* part of a surrogate pair, leave >=d800 */
     864             :         } else {
     865             :             /* BMP code point - may be surrogate code point - make <d800 */
     866           0 :             c1-=0x2800;
     867             :         }
     868             : 
     869           0 :         if(
     870           0 :             (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
     871           0 :             (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
     872             :         ) {
     873             :             /* part of a surrogate pair, leave >=d800 */
     874             :         } else {
     875             :             /* BMP code point - may be surrogate code point - make <d800 */
     876           0 :             c2-=0x2800;
     877             :         }
     878             :     }
     879             : 
     880             :     /* now c1 and c2 are in the requested (code unit or code point) order */
     881           0 :     return (int32_t)c1-(int32_t)c2;
     882             : }
     883             : 
     884             : #if 0
     885             : /*
     886             :  * u_strCompareIter() does not leave the iterators _on_ the different units.
     887             :  * This is possible but would cost a few extra indirect function calls to back
     888             :  * up if the last unit (c1 or c2 respectively) was >=0.
     889             :  *
     890             :  * Consistently leaving them _behind_ the different units is not an option
     891             :  * because the current "unit" is the end of the string if that is reached,
     892             :  * and in such a case the iterator does not move.
     893             :  * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
     894             :  * of their strings. Calling previous() on each does not move them to where
     895             :  * the comparison fails.
     896             :  *
     897             :  * So the simplest semantics is to not define where the iterators end up.
     898             :  *
     899             :  * The following fragment is part of what would need to be done for backing up.
     900             :  */
     901             : void fragment {
     902             :         /* iff a surrogate is part of a surrogate pair, leave >=d800 */
     903             :         if(c1<=0xdbff) {
     904             :             if(!U16_IS_TRAIL(iter1->current(iter1))) {
     905             :                 /* lead surrogate code point - make <d800 */
     906             :                 c1-=0x2800;
     907             :             }
     908             :         } else if(c1<=0xdfff) {
     909             :             int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
     910             :             iter1->previous(iter1); /* ==c1 */
     911             :             if(!U16_IS_LEAD(iter1->previous(iter1))) {
     912             :                 /* trail surrogate code point - make <d800 */
     913             :                 c1-=0x2800;
     914             :             }
     915             :             /* go back to behind where the difference is */
     916             :             iter1->move(iter1, idx, UITER_ZERO);
     917             :         } else /* 0xe000<=c1<=0xffff */ {
     918             :             /* BMP code point - make <d800 */
     919             :             c1-=0x2800;
     920             :         }
     921             : }
     922             : #endif
     923             : 
     924             : U_CAPI int32_t U_EXPORT2
     925           0 : u_strCompare(const UChar *s1, int32_t length1,
     926             :              const UChar *s2, int32_t length2,
     927             :              UBool codePointOrder) {
     928             :     /* argument checking */
     929           0 :     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
     930           0 :         return 0;
     931             :     }
     932           0 :     return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
     933             : }
     934             : 
     935             : /* String compare in code point order - u_strcmp() compares in code unit order. */
     936             : U_CAPI int32_t U_EXPORT2
     937           0 : u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
     938           0 :     return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
     939             : }
     940             : 
     941             : U_CAPI int32_t   U_EXPORT2
     942           0 : u_strncmp(const UChar     *s1, 
     943             :      const UChar     *s2, 
     944             :      int32_t     n) 
     945             : {
     946           0 :     if(n > 0) {
     947             :         int32_t rc;
     948             :         for(;;) {
     949           0 :             rc = (int32_t)*s1 - (int32_t)*s2;
     950           0 :             if(rc != 0 || *s1 == 0 || --n == 0) {
     951           0 :                 return rc;
     952             :             }
     953           0 :             ++s1;
     954           0 :             ++s2;
     955             :         }
     956             :     } else {
     957           0 :         return 0;
     958             :     }
     959             : }
     960             : 
     961             : U_CAPI int32_t U_EXPORT2
     962           0 : u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
     963           0 :     return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
     964             : }
     965             : 
     966             : U_CAPI UChar* U_EXPORT2
     967           0 : u_strcpy(UChar     *dst, 
     968             :     const UChar     *src) 
     969             : {
     970           0 :     UChar *anchor = dst;            /* save a pointer to start of dst */
     971             : 
     972           0 :     while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
     973             :     }
     974             : 
     975           0 :     return anchor;
     976             : }
     977             : 
     978             : U_CAPI UChar*  U_EXPORT2
     979           0 : u_strncpy(UChar     *dst, 
     980             :      const UChar     *src, 
     981             :      int32_t     n) 
     982             : {
     983           0 :     UChar *anchor = dst;            /* save a pointer to start of dst */
     984             : 
     985             :     /* copy string 2 over */
     986           0 :     while(n > 0 && (*(dst++) = *(src++)) != 0) {
     987           0 :         --n;
     988             :     }
     989             : 
     990           0 :     return anchor;
     991             : }
     992             : 
     993             : U_CAPI int32_t   U_EXPORT2
     994           2 : u_strlen(const UChar *s) 
     995             : {
     996             : #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
     997             :     return (int32_t)uprv_wcslen((const wchar_t *)s);
     998             : #else
     999           2 :     const UChar *t = s;
    1000          42 :     while(*t != 0) {
    1001          20 :       ++t;
    1002             :     }
    1003           2 :     return t - s;
    1004             : #endif
    1005             : }
    1006             : 
    1007             : U_CAPI int32_t U_EXPORT2
    1008           0 : u_countChar32(const UChar *s, int32_t length) {
    1009             :     int32_t count;
    1010             : 
    1011           0 :     if(s==NULL || length<-1) {
    1012           0 :         return 0;
    1013             :     }
    1014             : 
    1015           0 :     count=0;
    1016           0 :     if(length>=0) {
    1017           0 :         while(length>0) {
    1018           0 :             ++count;
    1019           0 :             if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
    1020           0 :                 s+=2;
    1021           0 :                 length-=2;
    1022             :             } else {
    1023           0 :                 ++s;
    1024           0 :                 --length;
    1025             :             }
    1026             :         }
    1027             :     } else /* length==-1 */ {
    1028             :         UChar c;
    1029             : 
    1030             :         for(;;) {
    1031           0 :             if((c=*s++)==0) {
    1032           0 :                 break;
    1033             :             }
    1034           0 :             ++count;
    1035             : 
    1036             :             /*
    1037             :              * sufficient to look ahead one because of UTF-16;
    1038             :              * safe to look ahead one because at worst that would be the terminating NUL
    1039             :              */
    1040           0 :             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
    1041           0 :                 ++s;
    1042             :             }
    1043             :         }
    1044             :     }
    1045           0 :     return count;
    1046             : }
    1047             : 
    1048             : U_CAPI UBool U_EXPORT2
    1049           0 : u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
    1050             : 
    1051           0 :     if(number<0) {
    1052           0 :         return TRUE;
    1053             :     }
    1054           0 :     if(s==NULL || length<-1) {
    1055           0 :         return FALSE;
    1056             :     }
    1057             : 
    1058           0 :     if(length==-1) {
    1059             :         /* s is NUL-terminated */
    1060             :         UChar c;
    1061             : 
    1062             :         /* count code points until they exceed */
    1063             :         for(;;) {
    1064           0 :             if((c=*s++)==0) {
    1065           0 :                 return FALSE;
    1066             :             }
    1067           0 :             if(number==0) {
    1068           0 :                 return TRUE;
    1069             :             }
    1070           0 :             if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
    1071           0 :                 ++s;
    1072             :             }
    1073           0 :             --number;
    1074             :         }
    1075             :     } else {
    1076             :         /* length>=0 known */
    1077             :         const UChar *limit;
    1078             :         int32_t maxSupplementary;
    1079             : 
    1080             :         /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
    1081           0 :         if(((length+1)/2)>number) {
    1082           0 :             return TRUE;
    1083             :         }
    1084             : 
    1085             :         /* check if s does not even contain enough UChars */
    1086           0 :         maxSupplementary=length-number;
    1087           0 :         if(maxSupplementary<=0) {
    1088           0 :             return FALSE;
    1089             :         }
    1090             :         /* there are maxSupplementary=length-number more UChars than asked-for code points */
    1091             : 
    1092             :         /*
    1093             :          * count code points until they exceed and also check that there are
    1094             :          * no more than maxSupplementary supplementary code points (UChar pairs)
    1095             :          */
    1096           0 :         limit=s+length;
    1097             :         for(;;) {
    1098           0 :             if(s==limit) {
    1099           0 :                 return FALSE;
    1100             :             }
    1101           0 :             if(number==0) {
    1102           0 :                 return TRUE;
    1103             :             }
    1104           0 :             if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
    1105           0 :                 ++s;
    1106           0 :                 if(--maxSupplementary<=0) {
    1107             :                     /* too many pairs - too few code points */
    1108           0 :                     return FALSE;
    1109             :                 }
    1110             :             }
    1111           0 :             --number;
    1112             :         }
    1113             :     }
    1114             : }
    1115             : 
    1116             : U_CAPI UChar * U_EXPORT2
    1117           0 : u_memcpy(UChar *dest, const UChar *src, int32_t count) {
    1118           0 :     if(count > 0) {
    1119           0 :         uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
    1120             :     }
    1121           0 :     return dest;
    1122             : }
    1123             : 
    1124             : U_CAPI UChar * U_EXPORT2
    1125           0 : u_memmove(UChar *dest, const UChar *src, int32_t count) {
    1126           0 :     if(count > 0) {
    1127           0 :         uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
    1128             :     }
    1129           0 :     return dest;
    1130             : }
    1131             : 
    1132             : U_CAPI UChar * U_EXPORT2
    1133           0 : u_memset(UChar *dest, UChar c, int32_t count) {
    1134           0 :     if(count > 0) {
    1135           0 :         UChar *ptr = dest;
    1136           0 :         UChar *limit = dest + count;
    1137             : 
    1138           0 :         while (ptr < limit) {
    1139           0 :             *(ptr++) = c;
    1140             :         }
    1141             :     }
    1142           0 :     return dest;
    1143             : }
    1144             : 
    1145             : U_CAPI int32_t U_EXPORT2
    1146           0 : u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
    1147           0 :     if(count > 0) {
    1148           0 :         const UChar *limit = buf1 + count;
    1149             :         int32_t result;
    1150             : 
    1151           0 :         while (buf1 < limit) {
    1152           0 :             result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
    1153           0 :             if (result != 0) {
    1154           0 :                 return result;
    1155             :             }
    1156           0 :             buf1++;
    1157           0 :             buf2++;
    1158             :         }
    1159             :     }
    1160           0 :     return 0;
    1161             : }
    1162             : 
    1163             : U_CAPI int32_t U_EXPORT2
    1164           0 : u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
    1165           0 :     return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
    1166             : }
    1167             : 
    1168             : /* u_unescape & support fns ------------------------------------------------- */
    1169             : 
    1170             : /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
    1171             : static const UChar UNESCAPE_MAP[] = {
    1172             :     /*"   0x22, 0x22 */
    1173             :     /*'   0x27, 0x27 */
    1174             :     /*?   0x3F, 0x3F */
    1175             :     /*\   0x5C, 0x5C */
    1176             :     /*a*/ 0x61, 0x07,
    1177             :     /*b*/ 0x62, 0x08,
    1178             :     /*e*/ 0x65, 0x1b,
    1179             :     /*f*/ 0x66, 0x0c,
    1180             :     /*n*/ 0x6E, 0x0a,
    1181             :     /*r*/ 0x72, 0x0d,
    1182             :     /*t*/ 0x74, 0x09,
    1183             :     /*v*/ 0x76, 0x0b
    1184             : };
    1185             : enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
    1186             : 
    1187             : /* Convert one octal digit to a numeric value 0..7, or -1 on failure */
    1188           0 : static int8_t _digit8(UChar c) {
    1189           0 :     if (c >= 0x0030 && c <= 0x0037) {
    1190           0 :         return (int8_t)(c - 0x0030);
    1191             :     }
    1192           0 :     return -1;
    1193             : }
    1194             : 
    1195             : /* Convert one hex digit to a numeric value 0..F, or -1 on failure */
    1196           0 : static int8_t _digit16(UChar c) {
    1197           0 :     if (c >= 0x0030 && c <= 0x0039) {
    1198           0 :         return (int8_t)(c - 0x0030);
    1199             :     }
    1200           0 :     if (c >= 0x0041 && c <= 0x0046) {
    1201           0 :         return (int8_t)(c - (0x0041 - 10));
    1202             :     }
    1203           0 :     if (c >= 0x0061 && c <= 0x0066) {
    1204           0 :         return (int8_t)(c - (0x0061 - 10));
    1205             :     }
    1206           0 :     return -1;
    1207             : }
    1208             : 
    1209             : /* Parse a single escape sequence.  Although this method deals in
    1210             :  * UChars, it does not use C++ or UnicodeString.  This allows it to
    1211             :  * be used from C contexts. */
    1212             : U_CAPI UChar32 U_EXPORT2
    1213           0 : u_unescapeAt(UNESCAPE_CHAR_AT charAt,
    1214             :              int32_t *offset,
    1215             :              int32_t length,
    1216             :              void *context) {
    1217             : 
    1218           0 :     int32_t start = *offset;
    1219             :     UChar c;
    1220           0 :     UChar32 result = 0;
    1221           0 :     int8_t n = 0;
    1222           0 :     int8_t minDig = 0;
    1223           0 :     int8_t maxDig = 0;
    1224           0 :     int8_t bitsPerDigit = 4; 
    1225             :     int8_t dig;
    1226             :     int32_t i;
    1227           0 :     UBool braces = FALSE;
    1228             : 
    1229             :     /* Check that offset is in range */
    1230           0 :     if (*offset < 0 || *offset >= length) {
    1231             :         goto err;
    1232             :     }
    1233             : 
    1234             :     /* Fetch first UChar after '\\' */
    1235           0 :     c = charAt((*offset)++, context);
    1236             : 
    1237             :     /* Convert hexadecimal and octal escapes */
    1238           0 :     switch (c) {
    1239             :     case 0x0075 /*'u'*/:
    1240           0 :         minDig = maxDig = 4;
    1241           0 :         break;
    1242             :     case 0x0055 /*'U'*/:
    1243           0 :         minDig = maxDig = 8;
    1244           0 :         break;
    1245             :     case 0x0078 /*'x'*/:
    1246           0 :         minDig = 1;
    1247           0 :         if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
    1248           0 :             ++(*offset);
    1249           0 :             braces = TRUE;
    1250           0 :             maxDig = 8;
    1251             :         } else {
    1252           0 :             maxDig = 2;
    1253             :         }
    1254           0 :         break;
    1255             :     default:
    1256           0 :         dig = _digit8(c);
    1257           0 :         if (dig >= 0) {
    1258           0 :             minDig = 1;
    1259           0 :             maxDig = 3;
    1260           0 :             n = 1; /* Already have first octal digit */
    1261           0 :             bitsPerDigit = 3;
    1262           0 :             result = dig;
    1263             :         }
    1264           0 :         break;
    1265             :     }
    1266           0 :     if (minDig != 0) {
    1267           0 :         while (*offset < length && n < maxDig) {
    1268           0 :             c = charAt(*offset, context);
    1269           0 :             dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
    1270           0 :             if (dig < 0) {
    1271           0 :                 break;
    1272             :             }
    1273           0 :             result = (result << bitsPerDigit) | dig;
    1274           0 :             ++(*offset);
    1275           0 :             ++n;
    1276             :         }
    1277           0 :         if (n < minDig) {
    1278           0 :             goto err;
    1279             :         }
    1280           0 :         if (braces) {
    1281           0 :             if (c != 0x7D /*}*/) {
    1282           0 :                 goto err;
    1283             :             }
    1284           0 :             ++(*offset);
    1285             :         }
    1286           0 :         if (result < 0 || result >= 0x110000) {
    1287             :             goto err;
    1288             :         }
    1289             :         /* If an escape sequence specifies a lead surrogate, see if
    1290             :          * there is a trail surrogate after it, either as an escape or
    1291             :          * as a literal.  If so, join them up into a supplementary.
    1292             :          */
    1293           0 :         if (*offset < length && U16_IS_LEAD(result)) {
    1294           0 :             int32_t ahead = *offset + 1;
    1295           0 :             c = charAt(*offset, context);
    1296           0 :             if (c == 0x5C /*'\\'*/ && ahead < length) {
    1297           0 :                 c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
    1298             :             }
    1299           0 :             if (U16_IS_TRAIL(c)) {
    1300           0 :                 *offset = ahead;
    1301           0 :                 result = U16_GET_SUPPLEMENTARY(result, c);
    1302             :             }
    1303             :         }
    1304           0 :         return result;
    1305             :     }
    1306             : 
    1307             :     /* Convert C-style escapes in table */
    1308           0 :     for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
    1309           0 :         if (c == UNESCAPE_MAP[i]) {
    1310           0 :             return UNESCAPE_MAP[i+1];
    1311           0 :         } else if (c < UNESCAPE_MAP[i]) {
    1312           0 :             break;
    1313             :         }
    1314             :     }
    1315             : 
    1316             :     /* Map \cX to control-X: X & 0x1F */
    1317           0 :     if (c == 0x0063 /*'c'*/ && *offset < length) {
    1318           0 :         c = charAt((*offset)++, context);
    1319           0 :         if (U16_IS_LEAD(c) && *offset < length) {
    1320           0 :             UChar c2 = charAt(*offset, context);
    1321           0 :             if (U16_IS_TRAIL(c2)) {
    1322           0 :                 ++(*offset);
    1323           0 :                 c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */
    1324             :             }
    1325             :         }
    1326           0 :         return 0x1F & c;
    1327             :     }
    1328             : 
    1329             :     /* If no special forms are recognized, then consider
    1330             :      * the backslash to generically escape the next character.
    1331             :      * Deal with surrogate pairs. */
    1332           0 :     if (U16_IS_LEAD(c) && *offset < length) {
    1333           0 :         UChar c2 = charAt(*offset, context);
    1334           0 :         if (U16_IS_TRAIL(c2)) {
    1335           0 :             ++(*offset);
    1336           0 :             return U16_GET_SUPPLEMENTARY(c, c2);
    1337             :         }
    1338             :     }
    1339           0 :     return c;
    1340             : 
    1341             :  err:
    1342             :     /* Invalid escape sequence */
    1343           0 :     *offset = start; /* Reset to initial value */
    1344           0 :     return (UChar32)0xFFFFFFFF;
    1345             : }
    1346             : 
    1347             : /* u_unescapeAt() callback to return a UChar from a char* */
    1348             : static UChar U_CALLCONV
    1349           0 : _charPtr_charAt(int32_t offset, void *context) {
    1350             :     UChar c16;
    1351             :     /* It would be more efficient to access the invariant tables
    1352             :      * directly but there is no API for that. */
    1353           0 :     u_charsToUChars(((char*) context) + offset, &c16, 1);
    1354           0 :     return c16;
    1355             : }
    1356             : 
    1357             : /* Append an escape-free segment of the text; used by u_unescape() */
    1358           0 : static void _appendUChars(UChar *dest, int32_t destCapacity,
    1359             :                           const char *src, int32_t srcLen) {
    1360           0 :     if (destCapacity < 0) {
    1361           0 :         destCapacity = 0;
    1362             :     }
    1363           0 :     if (srcLen > destCapacity) {
    1364           0 :         srcLen = destCapacity;
    1365             :     }
    1366           0 :     u_charsToUChars(src, dest, srcLen);
    1367           0 : }
    1368             : 
    1369             : /* Do an invariant conversion of char* -> UChar*, with escape parsing */
    1370             : U_CAPI int32_t U_EXPORT2
    1371           0 : u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
    1372           0 :     const char *segment = src;
    1373           0 :     int32_t i = 0;
    1374             :     char c;
    1375             : 
    1376           0 :     while ((c=*src) != 0) {
    1377             :         /* '\\' intentionally written as compiler-specific
    1378             :          * character constant to correspond to compiler-specific
    1379             :          * char* constants. */
    1380           0 :         if (c == '\\') {
    1381           0 :             int32_t lenParsed = 0;
    1382             :             UChar32 c32;
    1383           0 :             if (src != segment) {
    1384           0 :                 if (dest != NULL) {
    1385           0 :                     _appendUChars(dest + i, destCapacity - i,
    1386           0 :                                   segment, (int32_t)(src - segment));
    1387             :                 }
    1388           0 :                 i += (int32_t)(src - segment);
    1389             :             }
    1390           0 :             ++src; /* advance past '\\' */
    1391           0 :             c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
    1392           0 :             if (lenParsed == 0) {
    1393           0 :                 goto err;
    1394             :             }
    1395           0 :             src += lenParsed; /* advance past escape seq. */
    1396           0 :             if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
    1397           0 :                 U16_APPEND_UNSAFE(dest, i, c32);
    1398             :             } else {
    1399           0 :                 i += U16_LENGTH(c32);
    1400             :             }
    1401           0 :             segment = src;
    1402             :         } else {
    1403           0 :             ++src;
    1404             :         }
    1405             :     }
    1406           0 :     if (src != segment) {
    1407           0 :         if (dest != NULL) {
    1408           0 :             _appendUChars(dest + i, destCapacity - i,
    1409           0 :                           segment, (int32_t)(src - segment));
    1410             :         }
    1411           0 :         i += (int32_t)(src - segment);
    1412             :     }
    1413           0 :     if (dest != NULL && i < destCapacity) {
    1414           0 :         dest[i] = 0;
    1415             :     }
    1416           0 :     return i;
    1417             : 
    1418             :  err:
    1419           0 :     if (dest != NULL && destCapacity > 0) {
    1420           0 :         *dest = 0;
    1421             :     }
    1422           0 :     return 0;
    1423             : }
    1424             : 
    1425             : /* NUL-termination of strings ----------------------------------------------- */
    1426             : 
    1427             : /**
    1428             :  * NUL-terminate a string no matter what its type.
    1429             :  * Set warning and error codes accordingly.
    1430             :  */
    1431             : #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode)      \
    1432             :     if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
    1433             :         /* not a public function, so no complete argument checking */   \
    1434             :                                                                         \
    1435             :         if(length<0) {                                                  \
    1436             :             /* assume that the caller handles this */                   \
    1437             :         } else if(length<destCapacity) {                                \
    1438             :             /* NUL-terminate the string, the NUL fits */                \
    1439             :             dest[length]=0;                                             \
    1440             :             /* unset the not-terminated warning but leave all others */ \
    1441             :             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
    1442             :                 *pErrorCode=U_ZERO_ERROR;                               \
    1443             :             }                                                           \
    1444             :         } else if(length==destCapacity) {                               \
    1445             :             /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
    1446             :             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
    1447             :         } else /* length>destCapacity */ {                              \
    1448             :             /* even the string itself did not fit - set an error code */ \
    1449             :             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
    1450             :         }                                                               \
    1451             :     }
    1452             : 
    1453             : U_CAPI int32_t U_EXPORT2
    1454           0 : u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    1455           0 :     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    1456           0 :     return length;
    1457             : }
    1458             : 
    1459             : U_CAPI int32_t U_EXPORT2
    1460         164 : u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    1461         164 :     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    1462         164 :     return length;
    1463             : }
    1464             : 
    1465             : U_CAPI int32_t U_EXPORT2
    1466           0 : u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    1467           0 :     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    1468           0 :     return length;
    1469             : }
    1470             : 
    1471             : U_CAPI int32_t U_EXPORT2
    1472           0 : u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
    1473           0 :     __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
    1474           0 :     return length;
    1475             : }
    1476             : 
    1477             : // Compute the hash code for a string -------------------------------------- ***
    1478             : 
    1479             : // Moved here from uhash.c so that UnicodeString::hashCode() does not depend
    1480             : // on UHashtable code.
    1481             : 
    1482             : /*
    1483             :   Compute the hash by iterating sparsely over about 32 (up to 63)
    1484             :   characters spaced evenly through the string.  For each character,
    1485             :   multiply the previous hash value by a prime number and add the new
    1486             :   character in, like a linear congruential random number generator,
    1487             :   producing a pseudorandom deterministic value well distributed over
    1488             :   the output range. [LIU]
    1489             : */
    1490             : 
    1491             : #define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
    1492             :     uint32_t hash = 0;                        \
    1493             :     const TYPE *p = (const TYPE*) STR;        \
    1494             :     if (p != NULL) {                          \
    1495             :         int32_t len = (int32_t)(STRLEN);      \
    1496             :         int32_t inc = ((len - 32) / 32) + 1;  \
    1497             :         const TYPE *limit = p + len;          \
    1498             :         while (p<limit) {                     \
    1499             :             hash = (hash * 37) + DEREF;       \
    1500             :             p += inc;                         \
    1501             :         }                                     \
    1502             :     }                                         \
    1503             :     return static_cast<int32_t>(hash)
    1504             : 
    1505             : /* Used by UnicodeString to compute its hashcode - Not public API. */
    1506             : U_CAPI int32_t U_EXPORT2
    1507           0 : ustr_hashUCharsN(const UChar *str, int32_t length) {
    1508           0 :     STRING_HASH(UChar, str, length, *p);
    1509             : }
    1510             : 
    1511             : U_CAPI int32_t U_EXPORT2
    1512          28 : ustr_hashCharsN(const char *str, int32_t length) {
    1513          28 :     STRING_HASH(uint8_t, str, length, *p);
    1514             : }
    1515             : 
    1516             : U_CAPI int32_t U_EXPORT2
    1517           0 : ustr_hashICharsN(const char *str, int32_t length) {
    1518           0 :     STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
    1519             : }

Generated by: LCOV version 1.13