LCOV - code coverage report
Current view: top level - intl/icu/source/common - ucasemap.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 346 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 29 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2005-2016, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  ucasemap.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2005may06
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   Case mapping service object and functions using it.
      19             : */
      20             : 
      21             : #include "unicode/utypes.h"
      22             : #include "unicode/brkiter.h"
      23             : #include "unicode/casemap.h"
      24             : #include "unicode/edits.h"
      25             : #include "unicode/ubrk.h"
      26             : #include "unicode/uloc.h"
      27             : #include "unicode/ustring.h"
      28             : #include "unicode/ucasemap.h"
      29             : #if !UCONFIG_NO_BREAK_ITERATION
      30             : #include "unicode/utext.h"
      31             : #endif
      32             : #include "unicode/utf.h"
      33             : #include "unicode/utf8.h"
      34             : #include "unicode/utf16.h"
      35             : #include "cmemory.h"
      36             : #include "cstring.h"
      37             : #include "uassert.h"
      38             : #include "ucase.h"
      39             : #include "ucasemap_imp.h"
      40             : #include "ustr_imp.h"
      41             : 
      42             : U_NAMESPACE_BEGIN
      43             : 
      44             : namespace {
      45             : 
      46             : // TODO: share with UTF-16? inline in ucasemap_imp.h?
      47           0 : int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
      48             :                                    Edits *edits, UErrorCode &errorCode) {
      49           0 :     if (U_SUCCESS(errorCode)) {
      50           0 :         if (destIndex > destCapacity) {
      51           0 :             errorCode = U_BUFFER_OVERFLOW_ERROR;
      52           0 :         } else if (edits != NULL) {
      53           0 :             edits->copyErrorTo(errorCode);
      54             :         }
      55             :     }
      56           0 :     return destIndex;
      57             : }
      58             : 
      59             : }  // namespace
      60             : 
      61             : U_NAMESPACE_END
      62             : 
      63             : U_NAMESPACE_USE
      64             : 
      65             : /* UCaseMap service object -------------------------------------------------- */
      66             : 
      67           0 : UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
      68             : #if !UCONFIG_NO_BREAK_ITERATION
      69             :         iter(NULL),
      70             : #endif
      71           0 :         caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
      72           0 :     ucasemap_setLocale(this, localeID, pErrorCode);
      73           0 : }
      74             : 
      75           0 : UCaseMap::~UCaseMap() {
      76             : #if !UCONFIG_NO_BREAK_ITERATION
      77             :     delete iter;
      78             : #endif
      79           0 : }
      80             : 
      81             : U_CAPI UCaseMap * U_EXPORT2
      82           0 : ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
      83           0 :     if(U_FAILURE(*pErrorCode)) {
      84           0 :         return NULL;
      85             :     }
      86           0 :     UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
      87           0 :     if(csm==NULL) {
      88           0 :         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
      89           0 :         return NULL;
      90           0 :     } else if (U_FAILURE(*pErrorCode)) {
      91           0 :         delete csm;
      92           0 :         return NULL;
      93             :     }
      94           0 :     return csm;
      95             : }
      96             : 
      97             : U_CAPI void U_EXPORT2
      98           0 : ucasemap_close(UCaseMap *csm) {
      99           0 :     delete csm;
     100           0 : }
     101             : 
     102             : U_CAPI const char * U_EXPORT2
     103           0 : ucasemap_getLocale(const UCaseMap *csm) {
     104           0 :     return csm->locale;
     105             : }
     106             : 
     107             : U_CAPI uint32_t U_EXPORT2
     108           0 : ucasemap_getOptions(const UCaseMap *csm) {
     109           0 :     return csm->options;
     110             : }
     111             : 
     112             : U_CAPI void U_EXPORT2
     113           0 : ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
     114           0 :     if(U_FAILURE(*pErrorCode)) {
     115           0 :         return;
     116             :     }
     117           0 :     if (locale != NULL && *locale == 0) {
     118           0 :         csm->locale[0] = 0;
     119           0 :         csm->caseLocale = UCASE_LOC_ROOT;
     120           0 :         return;
     121             :     }
     122             : 
     123           0 :     int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
     124           0 :     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
     125           0 :         *pErrorCode=U_ZERO_ERROR;
     126             :         /* we only really need the language code for case mappings */
     127           0 :         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
     128             :     }
     129           0 :     if(length==sizeof(csm->locale)) {
     130           0 :         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
     131             :     }
     132           0 :     if(U_SUCCESS(*pErrorCode)) {
     133           0 :         csm->caseLocale=UCASE_LOC_UNKNOWN;
     134           0 :         csm->caseLocale = ucase_getCaseLocale(csm->locale);
     135             :     } else {
     136           0 :         csm->locale[0]=0;
     137           0 :         csm->caseLocale = UCASE_LOC_ROOT;
     138             :     }
     139             : }
     140             : 
     141             : U_CAPI void U_EXPORT2
     142           0 : ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
     143           0 :     if(U_FAILURE(*pErrorCode)) {
     144           0 :         return;
     145             :     }
     146           0 :     csm->options=options;
     147             : }
     148             : 
     149             : /* UTF-8 string case mappings ----------------------------------------------- */
     150             : 
     151             : /* TODO(markus): Move to a new, separate utf8case.cpp file. */
     152             : 
     153             : /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
     154             : static inline int32_t
     155           0 : appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
     156             :              int32_t result, const UChar *s,
     157             :              int32_t cpLength, uint32_t options, icu::Edits *edits) {
     158             :     UChar32 c;
     159             :     int32_t length;
     160             :     UErrorCode errorCode;
     161             : 
     162             :     /* decode the result */
     163           0 :     if(result<0) {
     164             :         /* (not) original code point */
     165           0 :         if(edits!=NULL) {
     166           0 :             edits->addUnchanged(cpLength);
     167           0 :             if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
     168           0 :                 return destIndex;
     169             :             }
     170             :         }
     171           0 :         c=~result;
     172           0 :         if(destIndex<destCapacity && c<=0x7f) {  // ASCII slightly-fastpath
     173           0 :             dest[destIndex++]=(uint8_t)c;
     174           0 :             return destIndex;
     175             :         }
     176           0 :         length=cpLength;
     177             :     } else {
     178           0 :         if(result<=UCASE_MAX_STRING_LENGTH) {
     179             :             // string: "result" is the UTF-16 length
     180           0 :             errorCode=U_ZERO_ERROR;
     181           0 :             if(destIndex<destCapacity) {
     182           0 :                 u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
     183           0 :                             s, result, &errorCode);
     184             :             } else {
     185           0 :                 u_strToUTF8(NULL, 0, &length, s, result, &errorCode);
     186             :             }
     187           0 :             if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
     188           0 :                 return -1;
     189             :             }
     190           0 :             if(length>(INT32_MAX-destIndex)) {
     191           0 :                 return -1;  // integer overflow
     192             :             }
     193           0 :             if(edits!=NULL) {
     194           0 :                 edits->addReplace(cpLength, length);
     195             :             }
     196             :             // We might have an overflow, but we know the actual length.
     197           0 :             return destIndex+length;
     198           0 :         } else if(destIndex<destCapacity && result<=0x7f) {  // ASCII slightly-fastpath
     199           0 :             dest[destIndex++]=(uint8_t)result;
     200           0 :             if(edits!=NULL) {
     201           0 :                 edits->addReplace(cpLength, 1);
     202             :             }
     203           0 :             return destIndex;
     204             :         } else {
     205           0 :             c=result;
     206           0 :             length=U8_LENGTH(c);
     207           0 :             if(edits!=NULL) {
     208           0 :                 edits->addReplace(cpLength, length);
     209             :             }
     210             :         }
     211             :     }
     212             :     // c>=0 single code point
     213           0 :     if(length>(INT32_MAX-destIndex)) {
     214           0 :         return -1;  // integer overflow
     215             :     }
     216             : 
     217           0 :     if(destIndex<destCapacity) {
     218             :         /* append the result */
     219           0 :         UBool isError=FALSE;
     220           0 :         U8_APPEND(dest, destIndex, destCapacity, c, isError);
     221           0 :         if(isError) {
     222             :             /* overflow, nothing written */
     223           0 :             destIndex+=length;
     224             :         }
     225             :     } else {
     226             :         /* preflight */
     227           0 :         destIndex+=length;
     228             :     }
     229           0 :     return destIndex;
     230             : }
     231             : 
     232             : static inline int32_t
     233             : appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
     234             :     if(destIndex<destCapacity) {
     235             :         dest[destIndex]=c;
     236             :     } else if(destIndex==INT32_MAX) {
     237             :         return -1;  // integer overflow
     238             :     }
     239             :     return destIndex+1;
     240             : }
     241             : 
     242             : // See unicode/utf8.h U8_APPEND_UNSAFE().
     243           0 : static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
     244           0 : static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
     245             : 
     246             : static inline int32_t
     247           0 : appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
     248           0 :     U_ASSERT(0x370 <= c && c <= 0x3ff);  // 2-byte UTF-8, main Greek block
     249           0 :     if(2>(INT32_MAX-destIndex)) {
     250           0 :         return -1;  // integer overflow
     251             :     }
     252           0 :     int32_t limit=destIndex+2;
     253           0 :     if(limit<=destCapacity) {
     254           0 :         dest+=destIndex;
     255           0 :         dest[0]=getTwoByteLead(c);
     256           0 :         dest[1]=getTwoByteTrail(c);
     257             :     }
     258           0 :     return limit;
     259             : }
     260             : 
     261             : static inline int32_t
     262           0 : appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
     263           0 :     if(2>(INT32_MAX-destIndex)) {
     264           0 :         return -1;  // integer overflow
     265             :     }
     266           0 :     int32_t limit=destIndex+2;
     267           0 :     if(limit<=destCapacity) {
     268           0 :         dest+=destIndex;
     269           0 :         dest[0]=(uint8_t)s[0];
     270           0 :         dest[1]=(uint8_t)s[1];
     271             :     }
     272           0 :     return limit;
     273             : }
     274             : 
     275             : static inline int32_t
     276           0 : appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
     277             :                 const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
     278           0 :     if(length>0) {
     279           0 :         if(edits!=NULL) {
     280           0 :             edits->addUnchanged(length);
     281           0 :             if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
     282           0 :                 return destIndex;
     283             :             }
     284             :         }
     285           0 :         if(length>(INT32_MAX-destIndex)) {
     286           0 :             return -1;  // integer overflow
     287             :         }
     288           0 :         if((destIndex+length)<=destCapacity) {
     289           0 :             uprv_memcpy(dest+destIndex, s, length);
     290             :         }
     291           0 :         destIndex+=length;
     292             :     }
     293           0 :     return destIndex;
     294             : }
     295             : 
     296             : static UChar32 U_CALLCONV
     297           0 : utf8_caseContextIterator(void *context, int8_t dir) {
     298           0 :     UCaseContext *csc=(UCaseContext *)context;
     299             :     UChar32 c;
     300             : 
     301           0 :     if(dir<0) {
     302             :         /* reset for backward iteration */
     303           0 :         csc->index=csc->cpStart;
     304           0 :         csc->dir=dir;
     305           0 :     } else if(dir>0) {
     306             :         /* reset for forward iteration */
     307           0 :         csc->index=csc->cpLimit;
     308           0 :         csc->dir=dir;
     309             :     } else {
     310             :         /* continue current iteration direction */
     311           0 :         dir=csc->dir;
     312             :     }
     313             : 
     314           0 :     if(dir<0) {
     315           0 :         if(csc->start<csc->index) {
     316           0 :             U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
     317           0 :             return c;
     318             :         }
     319             :     } else {
     320           0 :         if(csc->index<csc->limit) {
     321           0 :             U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
     322           0 :             return c;
     323             :         }
     324             :     }
     325           0 :     return U_SENTINEL;
     326             : }
     327             : 
     328             : /*
     329             :  * Case-maps [srcStart..srcLimit[ but takes
     330             :  * context [0..srcLength[ into account.
     331             :  */
     332             : static int32_t
     333           0 : _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
     334             :          uint8_t *dest, int32_t destCapacity,
     335             :          const uint8_t *src, UCaseContext *csc,
     336             :          int32_t srcStart, int32_t srcLimit,
     337             :          icu::Edits *edits,
     338             :          UErrorCode &errorCode) {
     339             :     /* case mapping loop */
     340           0 :     int32_t srcIndex=srcStart;
     341           0 :     int32_t destIndex=0;
     342           0 :     while(srcIndex<srcLimit) {
     343             :         int32_t cpStart;
     344           0 :         csc->cpStart=cpStart=srcIndex;
     345             :         UChar32 c;
     346           0 :         U8_NEXT(src, srcIndex, srcLimit, c);
     347           0 :         csc->cpLimit=srcIndex;
     348           0 :         if(c<0) {
     349             :             // Malformed UTF-8.
     350           0 :             destIndex=appendUnchanged(dest, destIndex, destCapacity,
     351           0 :                                       src+cpStart, srcIndex-cpStart, options, edits);
     352           0 :             if(destIndex<0) {
     353           0 :                 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     354           0 :                 return 0;
     355             :             }
     356           0 :             continue;
     357             :         }
     358             :         const UChar *s;
     359           0 :         c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
     360           0 :         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
     361           0 :                                  srcIndex - cpStart, options, edits);
     362           0 :         if (destIndex < 0) {
     363           0 :             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     364           0 :             return 0;
     365             :         }
     366             :     }
     367             : 
     368           0 :     return destIndex;
     369             : }
     370             : 
     371             : #if !UCONFIG_NO_BREAK_ITERATION
     372             : 
     373             : U_CFUNC int32_t U_CALLCONV
     374             : ucasemap_internalUTF8ToTitle(
     375             :         int32_t caseLocale, uint32_t options, BreakIterator *iter,
     376             :         uint8_t *dest, int32_t destCapacity,
     377             :         const uint8_t *src, int32_t srcLength,
     378             :         icu::Edits *edits,
     379             :         UErrorCode &errorCode) {
     380             :     if(U_FAILURE(errorCode)) {
     381             :         return 0;
     382             :     }
     383             : 
     384             :     /* set up local variables */
     385             :     UCaseContext csc=UCASECONTEXT_INITIALIZER;
     386             :     csc.p=(void *)src;
     387             :     csc.limit=srcLength;
     388             :     int32_t destIndex=0;
     389             :     int32_t prev=0;
     390             :     UBool isFirstIndex=TRUE;
     391             : 
     392             :     /* titlecasing loop */
     393             :     while(prev<srcLength) {
     394             :         /* find next index where to titlecase */
     395             :         int32_t index;
     396             :         if(isFirstIndex) {
     397             :             isFirstIndex=FALSE;
     398             :             index=iter->first();
     399             :         } else {
     400             :             index=iter->next();
     401             :         }
     402             :         if(index==UBRK_DONE || index>srcLength) {
     403             :             index=srcLength;
     404             :         }
     405             : 
     406             :         /*
     407             :          * Unicode 4 & 5 section 3.13 Default Case Operations:
     408             :          *
     409             :          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
     410             :          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
     411             :          * cased character F. If F exists, map F to default_title(F); then map each
     412             :          * subsequent character C to default_lower(C).
     413             :          *
     414             :          * In this implementation, segment [prev..index[ into 3 parts:
     415             :          * a) uncased characters (copy as-is) [prev..titleStart[
     416             :          * b) first case letter (titlecase)         [titleStart..titleLimit[
     417             :          * c) subsequent characters (lowercase)                 [titleLimit..index[
     418             :          */
     419             :         if(prev<index) {
     420             :             /* find and copy uncased characters [prev..titleStart[ */
     421             :             int32_t titleStart=prev;
     422             :             int32_t titleLimit=prev;
     423             :             UChar32 c;
     424             :             U8_NEXT(src, titleLimit, index, c);
     425             :             if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
     426             :                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
     427             :                 for(;;) {
     428             :                     titleStart=titleLimit;
     429             :                     if(titleLimit==index) {
     430             :                         /*
     431             :                          * only uncased characters in [prev..index[
     432             :                          * stop with titleStart==titleLimit==index
     433             :                          */
     434             :                         break;
     435             :                     }
     436             :                     U8_NEXT(src, titleLimit, index, c);
     437             :                     if(UCASE_NONE!=ucase_getType(c)) {
     438             :                         break; /* cased letter at [titleStart..titleLimit[ */
     439             :                     }
     440             :                 }
     441             :                 destIndex=appendUnchanged(dest, destIndex, destCapacity,
     442             :                                           src+prev, titleStart-prev, options, edits);
     443             :                 if(destIndex<0) {
     444             :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     445             :                     return 0;
     446             :                 }
     447             :             }
     448             : 
     449             :             if(titleStart<titleLimit) {
     450             :                 /* titlecase c which is from [titleStart..titleLimit[ */
     451             :                 if(c>=0) {
     452             :                     csc.cpStart=titleStart;
     453             :                     csc.cpLimit=titleLimit;
     454             :                     const UChar *s;
     455             :                     c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
     456             :                     destIndex=appendResult(dest, destIndex, destCapacity, c, s,
     457             :                                            titleLimit-titleStart, options, edits);
     458             :                 } else {
     459             :                     // Malformed UTF-8.
     460             :                     destIndex=appendUnchanged(dest, destIndex, destCapacity,
     461             :                                               src+titleStart, titleLimit-titleStart, options, edits);
     462             :                 }
     463             :                 if(destIndex<0) {
     464             :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     465             :                     return 0;
     466             :                 }
     467             : 
     468             :                 /* Special case Dutch IJ titlecasing */
     469             :                 if (titleStart+1 < index &&
     470             :                         caseLocale == UCASE_LOC_DUTCH &&
     471             :                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
     472             :                     if (src[titleStart+1] == 0x006A) {
     473             :                         destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A);
     474             :                         if(destIndex<0) {
     475             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     476             :                             return 0;
     477             :                         }
     478             :                         if(edits!=NULL) {
     479             :                             edits->addReplace(1, 1);
     480             :                         }
     481             :                         titleLimit++;
     482             :                     } else if (src[titleStart+1] == 0x004A) {
     483             :                         // Keep the capital J from getting lowercased.
     484             :                         destIndex=appendUnchanged(dest, destIndex, destCapacity,
     485             :                                                   src+titleStart+1, 1, options, edits);
     486             :                         if(destIndex<0) {
     487             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     488             :                             return 0;
     489             :                         }
     490             :                         titleLimit++;
     491             :                     }
     492             :                 }
     493             : 
     494             :                 /* lowercase [titleLimit..index[ */
     495             :                 if(titleLimit<index) {
     496             :                     if((options&U_TITLECASE_NO_LOWERCASE)==0) {
     497             :                         /* Normal operation: Lowercase the rest of the word. */
     498             :                         destIndex+=
     499             :                             _caseMap(
     500             :                                 caseLocale, options, ucase_toFullLower,
     501             :                                 dest+destIndex, destCapacity-destIndex,
     502             :                                 src, &csc,
     503             :                                 titleLimit, index,
     504             :                                 edits, errorCode);
     505             :                         if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
     506             :                             errorCode=U_ZERO_ERROR;
     507             :                         }
     508             :                         if(U_FAILURE(errorCode)) {
     509             :                             return destIndex;
     510             :                         }
     511             :                     } else {
     512             :                         /* Optionally just copy the rest of the word unchanged. */
     513             :                         destIndex=appendUnchanged(dest, destIndex, destCapacity,
     514             :                                                   src+titleLimit, index-titleLimit, options, edits);
     515             :                         if(destIndex<0) {
     516             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     517             :                             return 0;
     518             :                         }
     519             :                     }
     520             :                 }
     521             :             }
     522             :         }
     523             : 
     524             :         prev=index;
     525             :     }
     526             : 
     527             :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
     528             : }
     529             : 
     530             : #endif
     531             : 
     532             : U_NAMESPACE_BEGIN
     533             : namespace GreekUpper {
     534             : 
     535           0 : UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
     536           0 :     while (i < length) {
     537             :         UChar32 c;
     538           0 :         U8_NEXT(s, i, length, c);
     539           0 :         int32_t type = ucase_getTypeOrIgnorable(c);
     540           0 :         if ((type & UCASE_IGNORABLE) != 0) {
     541             :             // Case-ignorable, continue with the loop.
     542           0 :         } else if (type != UCASE_NONE) {
     543           0 :             return TRUE;  // Followed by cased letter.
     544             :         } else {
     545           0 :             return FALSE;  // Uncased and not case-ignorable.
     546             :         }
     547             :     }
     548           0 :     return FALSE;  // Not followed by cased letter.
     549             : }
     550             : 
     551             : // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
     552           0 : int32_t toUpper(uint32_t options,
     553             :                 uint8_t *dest, int32_t destCapacity,
     554             :                 const uint8_t *src, int32_t srcLength,
     555             :                 Edits *edits,
     556             :                 UErrorCode &errorCode) {
     557           0 :     int32_t destIndex=0;
     558           0 :     uint32_t state = 0;
     559           0 :     for (int32_t i = 0; i < srcLength;) {
     560           0 :         int32_t nextIndex = i;
     561             :         UChar32 c;
     562           0 :         U8_NEXT(src, nextIndex, srcLength, c);
     563           0 :         uint32_t nextState = 0;
     564           0 :         int32_t type = ucase_getTypeOrIgnorable(c);
     565           0 :         if ((type & UCASE_IGNORABLE) != 0) {
     566             :             // c is case-ignorable
     567           0 :             nextState |= (state & AFTER_CASED);
     568           0 :         } else if (type != UCASE_NONE) {
     569             :             // c is cased
     570           0 :             nextState |= AFTER_CASED;
     571             :         }
     572           0 :         uint32_t data = getLetterData(c);
     573           0 :         if (data > 0) {
     574           0 :             uint32_t upper = data & UPPER_MASK;
     575             :             // Add a dialytika to this iota or ypsilon vowel
     576             :             // if we removed a tonos from the previous vowel,
     577             :             // and that previous vowel did not also have (or gain) a dialytika.
     578             :             // Adding one only to the final vowel in a longer sequence
     579             :             // (which does not occur in normal writing) would require lookahead.
     580             :             // Set the same flag as for preserving an existing dialytika.
     581           0 :             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
     582           0 :                     (upper == 0x399 || upper == 0x3A5)) {
     583           0 :                 data |= HAS_DIALYTIKA;
     584             :             }
     585           0 :             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
     586           0 :             if ((data & HAS_YPOGEGRAMMENI) != 0) {
     587           0 :                 numYpogegrammeni = 1;
     588             :             }
     589             :             // Skip combining diacritics after this Greek letter.
     590           0 :             int32_t nextNextIndex = nextIndex;
     591           0 :             while (nextIndex < srcLength) {
     592             :                 UChar32 c2;
     593           0 :                 U8_NEXT(src, nextNextIndex, srcLength, c2);
     594           0 :                 uint32_t diacriticData = getDiacriticData(c2);
     595           0 :                 if (diacriticData != 0) {
     596           0 :                     data |= diacriticData;
     597           0 :                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
     598           0 :                         ++numYpogegrammeni;
     599             :                     }
     600           0 :                     nextIndex = nextNextIndex;
     601             :                 } else {
     602           0 :                     break;  // not a Greek diacritic
     603             :                 }
     604             :             }
     605           0 :             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
     606           0 :                 nextState |= AFTER_VOWEL_WITH_ACCENT;
     607             :             }
     608             :             // Map according to Greek rules.
     609           0 :             UBool addTonos = FALSE;
     610           0 :             if (upper == 0x397 &&
     611           0 :                     (data & HAS_ACCENT) != 0 &&
     612           0 :                     numYpogegrammeni == 0 &&
     613           0 :                     (state & AFTER_CASED) == 0 &&
     614           0 :                     !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
     615             :                 // Keep disjunctive "or" with (only) a tonos.
     616             :                 // We use the same "word boundary" conditions as for the Final_Sigma test.
     617           0 :                 if (i == nextIndex) {
     618           0 :                     upper = 0x389;  // Preserve the precomposed form.
     619             :                 } else {
     620           0 :                     addTonos = TRUE;
     621             :                 }
     622           0 :             } else if ((data & HAS_DIALYTIKA) != 0) {
     623             :                 // Preserve a vowel with dialytika in precomposed form if it exists.
     624           0 :                 if (upper == 0x399) {
     625           0 :                     upper = 0x3AA;
     626           0 :                     data &= ~HAS_EITHER_DIALYTIKA;
     627           0 :                 } else if (upper == 0x3A5) {
     628           0 :                     upper = 0x3AB;
     629           0 :                     data &= ~HAS_EITHER_DIALYTIKA;
     630             :                 }
     631             :             }
     632             : 
     633           0 :             UBool change = TRUE;
     634           0 :             if (edits != NULL) {
     635             :                 // Find out first whether we are changing the text.
     636           0 :                 U_ASSERT(0x370 <= upper && upper <= 0x3ff);  // 2-byte UTF-8, main Greek block
     637           0 :                 change = (i + 2) > nextIndex ||
     638           0 :                         src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
     639             :                         numYpogegrammeni > 0;
     640           0 :                 int32_t i2 = i + 2;
     641           0 :                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
     642           0 :                     change |= (i2 + 2) > nextIndex ||
     643           0 :                             src[i2] != (uint8_t)u8"\u0308"[0] ||
     644           0 :                             src[i2 + 1] != (uint8_t)u8"\u0308"[1];
     645           0 :                     i2 += 2;
     646             :                 }
     647           0 :                 if (addTonos) {
     648           0 :                     change |= (i2 + 2) > nextIndex ||
     649           0 :                             src[i2] != (uint8_t)u8"\u0301"[0] ||
     650           0 :                             src[i2 + 1] != (uint8_t)u8"\u0301"[1];
     651           0 :                     i2 += 2;
     652             :                 }
     653           0 :                 int32_t oldLength = nextIndex - i;
     654           0 :                 int32_t newLength = (i2 - i) + numYpogegrammeni * 2;  // 2 bytes per U+0399
     655           0 :                 change |= oldLength != newLength;
     656           0 :                 if (change) {
     657           0 :                     if (edits != NULL) {
     658           0 :                         edits->addReplace(oldLength, newLength);
     659             :                     }
     660             :                 } else {
     661           0 :                     if (edits != NULL) {
     662           0 :                         edits->addUnchanged(oldLength);
     663             :                     }
     664             :                     // Write unchanged text?
     665           0 :                     change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
     666             :                 }
     667             :             }
     668             : 
     669           0 :             if (change) {
     670           0 :                 destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper);
     671           0 :                 if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
     672           0 :                     destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308");  // restore or add a dialytika
     673             :                 }
     674           0 :                 if (destIndex >= 0 && addTonos) {
     675           0 :                     destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301");
     676             :                 }
     677           0 :                 while (destIndex >= 0 && numYpogegrammeni > 0) {
     678           0 :                     destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399");
     679           0 :                     --numYpogegrammeni;
     680             :                 }
     681           0 :                 if(destIndex<0) {
     682           0 :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     683           0 :                     return 0;
     684             :                 }
     685             :             }
     686           0 :         } else if(c>=0) {
     687             :             const UChar *s;
     688           0 :             c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
     689           0 :             destIndex = appendResult(dest, destIndex, destCapacity, c, s,
     690           0 :                                      nextIndex - i, options, edits);
     691           0 :             if (destIndex < 0) {
     692           0 :                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     693           0 :                 return 0;
     694             :             }
     695             :         } else {
     696             :             // Malformed UTF-8.
     697           0 :             destIndex=appendUnchanged(dest, destIndex, destCapacity,
     698           0 :                                       src+i, nextIndex-i, options, edits);
     699           0 :             if(destIndex<0) {
     700           0 :                 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     701           0 :                 return 0;
     702             :             }
     703             :         }
     704           0 :         i = nextIndex;
     705           0 :         state = nextState;
     706             :     }
     707             : 
     708           0 :     return destIndex;
     709             : }
     710             : 
     711             : }  // namespace GreekUpper
     712             : U_NAMESPACE_END
     713             : 
     714             : static int32_t U_CALLCONV
     715           0 : ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
     716             :                              uint8_t *dest, int32_t destCapacity,
     717             :                              const uint8_t *src, int32_t srcLength,
     718             :                              icu::Edits *edits,
     719             :                              UErrorCode &errorCode) {
     720           0 :     UCaseContext csc=UCASECONTEXT_INITIALIZER;
     721           0 :     csc.p=(void *)src;
     722           0 :     csc.limit=srcLength;
     723             :     int32_t destIndex = _caseMap(
     724             :         caseLocale, options, ucase_toFullLower,
     725             :         dest, destCapacity,
     726             :         src, &csc, 0, srcLength,
     727           0 :         edits, errorCode);
     728           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
     729             : }
     730             : 
     731             : static int32_t U_CALLCONV
     732           0 : ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
     733             :                              uint8_t *dest, int32_t destCapacity,
     734             :                              const uint8_t *src, int32_t srcLength,
     735             :                              icu::Edits *edits,
     736             :                              UErrorCode &errorCode) {
     737             :     int32_t destIndex;
     738           0 :     if (caseLocale == UCASE_LOC_GREEK) {
     739             :         destIndex = GreekUpper::toUpper(options, dest, destCapacity,
     740           0 :                                         src, srcLength, edits, errorCode);
     741             :     } else {
     742           0 :         UCaseContext csc=UCASECONTEXT_INITIALIZER;
     743           0 :         csc.p=(void *)src;
     744           0 :         csc.limit=srcLength;
     745             :         destIndex = _caseMap(
     746             :             caseLocale, options, ucase_toFullUpper,
     747             :             dest, destCapacity,
     748             :             src, &csc, 0, srcLength,
     749           0 :             edits, errorCode);
     750             :     }
     751           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
     752             : }
     753             : 
     754             : static int32_t U_CALLCONV
     755           0 : ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
     756             :                           uint8_t *dest, int32_t destCapacity,
     757             :                           const uint8_t *src, int32_t srcLength,
     758             :                           icu::Edits *edits,
     759             :                           UErrorCode &errorCode) {
     760             :     /* case mapping loop */
     761           0 :     int32_t srcIndex = 0;
     762           0 :     int32_t destIndex = 0;
     763           0 :     while (srcIndex < srcLength) {
     764           0 :         int32_t cpStart = srcIndex;
     765             :         UChar32 c;
     766           0 :         U8_NEXT(src, srcIndex, srcLength, c);
     767           0 :         if(c<0) {
     768             :             // Malformed UTF-8.
     769           0 :             destIndex=appendUnchanged(dest, destIndex, destCapacity,
     770           0 :                                       src+cpStart, srcIndex-cpStart, options, edits);
     771           0 :             if(destIndex<0) {
     772           0 :                 errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     773           0 :                 return 0;
     774             :             }
     775           0 :             continue;
     776             :         }
     777             :         const UChar *s;
     778           0 :         c = ucase_toFullFolding(c, &s, options);
     779           0 :         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
     780           0 :                                  srcIndex - cpStart, options, edits);
     781           0 :         if (destIndex < 0) {
     782           0 :             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     783           0 :             return 0;
     784             :         }
     785             :     }
     786             : 
     787           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
     788             : }
     789             : 
     790             : U_CFUNC int32_t
     791           0 : ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
     792             :                  uint8_t *dest, int32_t destCapacity,
     793             :                  const uint8_t *src, int32_t srcLength,
     794             :                  UTF8CaseMapper *stringCaseMapper,
     795             :                  icu::Edits *edits,
     796             :                  UErrorCode &errorCode) {
     797             :     int32_t destLength;
     798             : 
     799             :     /* check argument values */
     800           0 :     if(U_FAILURE(errorCode)) {
     801           0 :         return 0;
     802             :     }
     803           0 :     if( destCapacity<0 ||
     804           0 :         (dest==NULL && destCapacity>0) ||
     805           0 :         src==NULL ||
     806             :         srcLength<-1
     807             :     ) {
     808           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     809           0 :         return 0;
     810             :     }
     811             : 
     812             :     /* get the string length */
     813           0 :     if(srcLength==-1) {
     814           0 :         srcLength=(int32_t)uprv_strlen((const char *)src);
     815             :     }
     816             : 
     817             :     /* check for overlapping source and destination */
     818           0 :     if( dest!=NULL &&
     819           0 :         ((src>=dest && src<(dest+destCapacity)) ||
     820           0 :          (dest>=src && dest<(src+srcLength)))
     821             :     ) {
     822           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
     823           0 :         return 0;
     824             :     }
     825             : 
     826           0 :     if(edits!=NULL) {
     827           0 :         edits->reset();
     828             :     }
     829             :     destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
     830           0 :                                 dest, destCapacity, src, srcLength, edits, errorCode);
     831           0 :     return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode);
     832             : }
     833             : 
     834             : /* public API functions */
     835             : 
     836             : U_CAPI int32_t U_EXPORT2
     837           0 : ucasemap_utf8ToLower(const UCaseMap *csm,
     838             :                      char *dest, int32_t destCapacity,
     839             :                      const char *src, int32_t srcLength,
     840             :                      UErrorCode *pErrorCode) {
     841             :     return ucasemap_mapUTF8(
     842           0 :         csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
     843             :         (uint8_t *)dest, destCapacity,
     844             :         (const uint8_t *)src, srcLength,
     845           0 :         ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
     846             : }
     847             : 
     848             : U_CAPI int32_t U_EXPORT2
     849           0 : ucasemap_utf8ToUpper(const UCaseMap *csm,
     850             :                      char *dest, int32_t destCapacity,
     851             :                      const char *src, int32_t srcLength,
     852             :                      UErrorCode *pErrorCode) {
     853             :     return ucasemap_mapUTF8(
     854           0 :         csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
     855             :         (uint8_t *)dest, destCapacity,
     856             :         (const uint8_t *)src, srcLength,
     857           0 :         ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
     858             : }
     859             : 
     860             : U_CAPI int32_t U_EXPORT2
     861           0 : ucasemap_utf8FoldCase(const UCaseMap *csm,
     862             :                       char *dest, int32_t destCapacity,
     863             :                       const char *src, int32_t srcLength,
     864             :                       UErrorCode *pErrorCode) {
     865             :     return ucasemap_mapUTF8(
     866           0 :         UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
     867             :         (uint8_t *)dest, destCapacity,
     868             :         (const uint8_t *)src, srcLength,
     869           0 :         ucasemap_internalUTF8Fold, NULL, *pErrorCode);
     870             : }
     871             : 
     872             : U_NAMESPACE_BEGIN
     873             : 
     874           0 : int32_t CaseMap::utf8ToLower(
     875             :         const char *locale, uint32_t options,
     876             :         const char *src, int32_t srcLength,
     877             :         char *dest, int32_t destCapacity, Edits *edits,
     878             :         UErrorCode &errorCode) {
     879           0 :     return ucasemap_mapUTF8(
     880             :         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
     881             :         (uint8_t *)dest, destCapacity,
     882             :         (const uint8_t *)src, srcLength,
     883           0 :         ucasemap_internalUTF8ToLower, edits, errorCode);
     884             : }
     885             : 
     886           0 : int32_t CaseMap::utf8ToUpper(
     887             :         const char *locale, uint32_t options,
     888             :         const char *src, int32_t srcLength,
     889             :         char *dest, int32_t destCapacity, Edits *edits,
     890             :         UErrorCode &errorCode) {
     891           0 :     return ucasemap_mapUTF8(
     892             :         ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
     893             :         (uint8_t *)dest, destCapacity,
     894             :         (const uint8_t *)src, srcLength,
     895           0 :         ucasemap_internalUTF8ToUpper, edits, errorCode);
     896             : }
     897             : 
     898           0 : int32_t CaseMap::utf8Fold(
     899             :         uint32_t options,
     900             :         const char *src, int32_t srcLength,
     901             :         char *dest, int32_t destCapacity, Edits *edits,
     902             :         UErrorCode &errorCode) {
     903             :     return ucasemap_mapUTF8(
     904             :         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
     905             :         (uint8_t *)dest, destCapacity,
     906             :         (const uint8_t *)src, srcLength,
     907           0 :         ucasemap_internalUTF8Fold, edits, errorCode);
     908             : }
     909             : 
     910             : U_NAMESPACE_END

Generated by: LCOV version 1.13