LCOV - output.info - intl/icu/source/common/ustrcase.cpp

LCOV - code coverage report

Current view:	top level - intl/icu/source/common - ustrcase.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	439	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	23	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2001-2015, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  ustrcase.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2002feb20
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   Implementation file for string casing C API functions.
      19             : *   Uses functions from uchar.c for basic functionality that requires access
      20             : *   to the Unicode Character Database (uprops.dat).
      21             : */
      22             : 
      23             : #include "unicode/utypes.h"
      24             : #include "unicode/brkiter.h"
      25             : #include "unicode/casemap.h"
      26             : #include "unicode/edits.h"
      27             : #include "unicode/ustring.h"
      28             : #include "unicode/ucasemap.h"
      29             : #include "unicode/ubrk.h"
      30             : #include "unicode/utf.h"
      31             : #include "unicode/utf16.h"
      32             : #include "cmemory.h"
      33             : #include "ucase.h"
      34             : #include "ucasemap_imp.h"
      35             : #include "ustr_imp.h"
      36             : #include "uassert.h"
      37             : 
      38             : U_NAMESPACE_BEGIN
      39             : 
      40             : namespace {
      41             : 
      42           0 : int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
      43             :                                    Edits *edits, UErrorCode &errorCode) {
      44           0 :     if (U_SUCCESS(errorCode)) {
      45           0 :         if (destIndex > destCapacity) {
      46           0 :             errorCode = U_BUFFER_OVERFLOW_ERROR;
      47           0 :         } else if (edits != NULL) {
      48           0 :             edits->copyErrorTo(errorCode);
      49             :         }
      50             :     }
      51           0 :     return destIndex;
      52             : }
      53             : 
      54             : }  // namespace
      55             : 
      56             : U_NAMESPACE_END
      57             : 
      58             : U_NAMESPACE_USE
      59             : 
      60             : /* string casing ------------------------------------------------------------ */
      61             : 
      62             : /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
      63             : static inline int32_t
      64           0 : appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
      65             :              int32_t result, const UChar *s,
      66             :              int32_t cpLength, uint32_t options, icu::Edits *edits) {
      67             :     UChar32 c;
      68             :     int32_t length;
      69             : 
      70             :     /* decode the result */
      71           0 :     if(result<0) {
      72             :         /* (not) original code point */
      73           0 :         if(edits!=NULL) {
      74           0 :             edits->addUnchanged(cpLength);
      75           0 :             if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
      76           0 :                 return destIndex;
      77             :             }
      78             :         }
      79           0 :         c=~result;
      80           0 :         if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath
      81           0 :             dest[destIndex++]=(UChar)c;
      82           0 :             return destIndex;
      83             :         }
      84           0 :         length=cpLength;
      85             :     } else {
      86           0 :         if(result<=UCASE_MAX_STRING_LENGTH) {
      87           0 :             c=U_SENTINEL;
      88           0 :             length=result;
      89           0 :         } else if(destIndex<destCapacity && result<=0xffff) {  // BMP slightly-fastpath
      90           0 :             dest[destIndex++]=(UChar)result;
      91           0 :             if(edits!=NULL) {
      92           0 :                 edits->addReplace(cpLength, 1);
      93             :             }
      94           0 :             return destIndex;
      95             :         } else {
      96           0 :             c=result;
      97           0 :             length=U16_LENGTH(c);
      98             :         }
      99           0 :         if(edits!=NULL) {
     100           0 :             edits->addReplace(cpLength, length);
     101             :         }
     102             :     }
     103           0 :     if(length>(INT32_MAX-destIndex)) {
     104           0 :         return -1;  // integer overflow
     105             :     }
     106             : 
     107           0 :     if(destIndex<destCapacity) {
     108             :         /* append the result */
     109           0 :         if(c>=0) {
     110             :             /* code point */
     111           0 :             UBool isError=FALSE;
     112           0 :             U16_APPEND(dest, destIndex, destCapacity, c, isError);
     113           0 :             if(isError) {
     114             :                 /* overflow, nothing written */
     115           0 :                 destIndex+=length;
     116             :             }
     117             :         } else {
     118             :             /* string */
     119           0 :             if((destIndex+length)<=destCapacity) {
     120           0 :                 while(length>0) {
     121           0 :                     dest[destIndex++]=*s++;
     122           0 :                     --length;
     123             :                 }
     124             :             } else {
     125             :                 /* overflow */
     126           0 :                 destIndex+=length;
     127             :             }
     128             :         }
     129             :     } else {
     130             :         /* preflight */
     131           0 :         destIndex+=length;
     132             :     }
     133           0 :     return destIndex;
     134             : }
     135             : 
     136             : static inline int32_t
     137           0 : appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
     138           0 :     if(destIndex<destCapacity) {
     139           0 :         dest[destIndex]=c;
     140           0 :     } else if(destIndex==INT32_MAX) {
     141           0 :         return -1;  // integer overflow
     142             :     }
     143           0 :     return destIndex+1;
     144             : }
     145             : 
     146             : static inline int32_t
     147             : appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
     148             :                 const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
     149             :     if(length>0) {
     150             :         if(edits!=NULL) {
     151             :             edits->addUnchanged(length);
     152             :             if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
     153             :                 return destIndex;
     154             :             }
     155             :         }
     156             :         if(length>(INT32_MAX-destIndex)) {
     157             :             return -1;  // integer overflow
     158             :         }
     159             :         if((destIndex+length)<=destCapacity) {
     160             :             u_memcpy(dest+destIndex, s, length);
     161             :         }
     162             :         destIndex+=length;
     163             :     }
     164             :     return destIndex;
     165             : }
     166             : 
     167             : static UChar32 U_CALLCONV
     168           0 : utf16_caseContextIterator(void *context, int8_t dir) {
     169           0 :     UCaseContext *csc=(UCaseContext *)context;
     170             :     UChar32 c;
     171             : 
     172           0 :     if(dir<0) {
     173             :         /* reset for backward iteration */
     174           0 :         csc->index=csc->cpStart;
     175           0 :         csc->dir=dir;
     176           0 :     } else if(dir>0) {
     177             :         /* reset for forward iteration */
     178           0 :         csc->index=csc->cpLimit;
     179           0 :         csc->dir=dir;
     180             :     } else {
     181             :         /* continue current iteration direction */
     182           0 :         dir=csc->dir;
     183             :     }
     184             : 
     185           0 :     if(dir<0) {
     186           0 :         if(csc->start<csc->index) {
     187           0 :             U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
     188           0 :             return c;
     189             :         }
     190             :     } else {
     191           0 :         if(csc->index<csc->limit) {
     192           0 :             U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
     193           0 :             return c;
     194             :         }
     195             :     }
     196           0 :     return U_SENTINEL;
     197             : }
     198             : 
     199             : /*
     200             :  * Case-maps [srcStart..srcLimit[ but takes
     201             :  * context [0..srcLength[ into account.
     202             :  */
     203             : static int32_t
     204           0 : _caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
     205             :          UChar *dest, int32_t destCapacity,
     206             :          const UChar *src, UCaseContext *csc,
     207             :          int32_t srcStart, int32_t srcLimit,
     208             :          icu::Edits *edits,
     209             :          UErrorCode &errorCode) {
     210             :     /* case mapping loop */
     211           0 :     int32_t srcIndex=srcStart;
     212           0 :     int32_t destIndex=0;
     213           0 :     while(srcIndex<srcLimit) {
     214             :         int32_t cpStart;
     215           0 :         csc->cpStart=cpStart=srcIndex;
     216             :         UChar32 c;
     217           0 :         U16_NEXT(src, srcIndex, srcLimit, c);
     218           0 :         csc->cpLimit=srcIndex;
     219             :         const UChar *s;
     220           0 :         c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
     221           0 :         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
     222           0 :                                  srcIndex - cpStart, options, edits);
     223           0 :         if (destIndex < 0) {
     224           0 :             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     225           0 :             return 0;
     226             :         }
     227             :     }
     228             : 
     229           0 :     return destIndex;
     230             : }
     231             : 
     232             : #if !UCONFIG_NO_BREAK_ITERATION
     233             : 
     234             : U_CFUNC int32_t U_CALLCONV
     235             : ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
     236             :                          UChar *dest, int32_t destCapacity,
     237             :                          const UChar *src, int32_t srcLength,
     238             :                          icu::Edits *edits,
     239             :                          UErrorCode &errorCode) {
     240             :     if(U_FAILURE(errorCode)) {
     241             :         return 0;
     242             :     }
     243             : 
     244             :     /* set up local variables */
     245             :     UCaseContext csc=UCASECONTEXT_INITIALIZER;
     246             :     csc.p=(void *)src;
     247             :     csc.limit=srcLength;
     248             :     int32_t destIndex=0;
     249             :     int32_t prev=0;
     250             :     UBool isFirstIndex=TRUE;
     251             : 
     252             :     /* titlecasing loop */
     253             :     while(prev<srcLength) {
     254             :         /* find next index where to titlecase */
     255             :         int32_t index;
     256             :         if(isFirstIndex) {
     257             :             isFirstIndex=FALSE;
     258             :             index=iter->first();
     259             :         } else {
     260             :             index=iter->next();
     261             :         }
     262             :         if(index==UBRK_DONE || index>srcLength) {
     263             :             index=srcLength;
     264             :         }
     265             : 
     266             :         /*
     267             :          * Unicode 4 & 5 section 3.13 Default Case Operations:
     268             :          *
     269             :          * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
     270             :          * #29, "Text Boundaries." Between each pair of word boundaries, find the first
     271             :          * cased character F. If F exists, map F to default_title(F); then map each
     272             :          * subsequent character C to default_lower(C).
     273             :          *
     274             :          * In this implementation, segment [prev..index[ into 3 parts:
     275             :          * a) uncased characters (copy as-is) [prev..titleStart[
     276             :          * b) first case letter (titlecase)         [titleStart..titleLimit[
     277             :          * c) subsequent characters (lowercase)                 [titleLimit..index[
     278             :          */
     279             :         if(prev<index) {
     280             :             /* find and copy uncased characters [prev..titleStart[ */
     281             :             int32_t titleStart=prev;
     282             :             int32_t titleLimit=prev;
     283             :             UChar32 c;
     284             :             U16_NEXT(src, titleLimit, index, c);
     285             :             if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
     286             :                 /* Adjust the titlecasing index (titleStart) to the next cased character. */
     287             :                 for(;;) {
     288             :                     titleStart=titleLimit;
     289             :                     if(titleLimit==index) {
     290             :                         /*
     291             :                          * only uncased characters in [prev..index[
     292             :                          * stop with titleStart==titleLimit==index
     293             :                          */
     294             :                         break;
     295             :                     }
     296             :                     U16_NEXT(src, titleLimit, index, c);
     297             :                     if(UCASE_NONE!=ucase_getType(c)) {
     298             :                         break; /* cased letter at [titleStart..titleLimit[ */
     299             :                     }
     300             :                 }
     301             :                 destIndex=appendUnchanged(dest, destIndex, destCapacity,
     302             :                                           src+prev, titleStart-prev, options, edits);
     303             :                 if(destIndex<0) {
     304             :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     305             :                     return 0;
     306             :                 }
     307             :             }
     308             : 
     309             :             if(titleStart<titleLimit) {
     310             :                 /* titlecase c which is from [titleStart..titleLimit[ */
     311             :                 csc.cpStart=titleStart;
     312             :                 csc.cpLimit=titleLimit;
     313             :                 const UChar *s;
     314             :                 c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
     315             :                 destIndex=appendResult(dest, destIndex, destCapacity, c, s,
     316             :                                        titleLimit-titleStart, options, edits);
     317             :                 if(destIndex<0) {
     318             :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     319             :                     return 0;
     320             :                 }
     321             : 
     322             :                 /* Special case Dutch IJ titlecasing */
     323             :                 if (titleStart+1 < index &&
     324             :                         caseLocale == UCASE_LOC_DUTCH &&
     325             :                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
     326             :                     if (src[titleStart+1] == 0x006A) {
     327             :                         destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
     328             :                         if(destIndex<0) {
     329             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     330             :                             return 0;
     331             :                         }
     332             :                         if(edits!=NULL) {
     333             :                             edits->addReplace(1, 1);
     334             :                         }
     335             :                         titleLimit++;
     336             :                     } else if (src[titleStart+1] == 0x004A) {
     337             :                         // Keep the capital J from getting lowercased.
     338             :                         destIndex=appendUnchanged(dest, destIndex, destCapacity,
     339             :                                                   src+titleStart+1, 1, options, edits);
     340             :                         if(destIndex<0) {
     341             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     342             :                             return 0;
     343             :                         }
     344             :                         titleLimit++;
     345             :                     }
     346             :                 }
     347             : 
     348             :                 /* lowercase [titleLimit..index[ */
     349             :                 if(titleLimit<index) {
     350             :                     if((options&U_TITLECASE_NO_LOWERCASE)==0) {
     351             :                         /* Normal operation: Lowercase the rest of the word. */
     352             :                         destIndex+=
     353             :                             _caseMap(
     354             :                                 caseLocale, options, ucase_toFullLower,
     355             :                                 dest+destIndex, destCapacity-destIndex,
     356             :                                 src, &csc,
     357             :                                 titleLimit, index,
     358             :                                 edits, errorCode);
     359             :                         if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
     360             :                             errorCode=U_ZERO_ERROR;
     361             :                         }
     362             :                         if(U_FAILURE(errorCode)) {
     363             :                             return destIndex;
     364             :                         }
     365             :                     } else {
     366             :                         /* Optionally just copy the rest of the word unchanged. */
     367             :                         destIndex=appendUnchanged(dest, destIndex, destCapacity,
     368             :                                                   src+titleLimit, index-titleLimit, options, edits);
     369             :                         if(destIndex<0) {
     370             :                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     371             :                             return 0;
     372             :                         }
     373             :                     }
     374             :                 }
     375             :             }
     376             :         }
     377             : 
     378             :         prev=index;
     379             :     }
     380             : 
     381             :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
     382             : }
     383             : 
     384             : #endif  // !UCONFIG_NO_BREAK_ITERATION
     385             : 
     386             : U_NAMESPACE_BEGIN
     387             : namespace GreekUpper {
     388             : 
     389             : // Data generated by prototype code, see
     390             : // http://site.icu-project.org/design/case/greek-upper
     391             : // TODO: Move this data into ucase.icu.
     392             : static const uint16_t data0370[] = {
     393             :     // U+0370..03FF
     394             :     0x0370,
     395             :     0x0370,
     396             :     0x0372,
     397             :     0x0372,
     398             :     0,
     399             :     0,
     400             :     0x0376,
     401             :     0x0376,
     402             :     0,
     403             :     0,
     404             :     0x037A,
     405             :     0x03FD,
     406             :     0x03FE,
     407             :     0x03FF,
     408             :     0,
     409             :     0x037F,
     410             :     0,
     411             :     0,
     412             :     0,
     413             :     0,
     414             :     0,
     415             :     0,
     416             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     417             :     0,
     418             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     419             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     420             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     421             :     0,
     422             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     423             :     0,
     424             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     425             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     426             :     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     427             :     0x0391 | HAS_VOWEL,
     428             :     0x0392,
     429             :     0x0393,
     430             :     0x0394,
     431             :     0x0395 | HAS_VOWEL,
     432             :     0x0396,
     433             :     0x0397 | HAS_VOWEL,
     434             :     0x0398,
     435             :     0x0399 | HAS_VOWEL,
     436             :     0x039A,
     437             :     0x039B,
     438             :     0x039C,
     439             :     0x039D,
     440             :     0x039E,
     441             :     0x039F | HAS_VOWEL,
     442             :     0x03A0,
     443             :     0x03A1,
     444             :     0,
     445             :     0x03A3,
     446             :     0x03A4,
     447             :     0x03A5 | HAS_VOWEL,
     448             :     0x03A6,
     449             :     0x03A7,
     450             :     0x03A8,
     451             :     0x03A9 | HAS_VOWEL,
     452             :     0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
     453             :     0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
     454             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     455             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     456             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     457             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     458             :     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     459             :     0x0391 | HAS_VOWEL,
     460             :     0x0392,
     461             :     0x0393,
     462             :     0x0394,
     463             :     0x0395 | HAS_VOWEL,
     464             :     0x0396,
     465             :     0x0397 | HAS_VOWEL,
     466             :     0x0398,
     467             :     0x0399 | HAS_VOWEL,
     468             :     0x039A,
     469             :     0x039B,
     470             :     0x039C,
     471             :     0x039D,
     472             :     0x039E,
     473             :     0x039F | HAS_VOWEL,
     474             :     0x03A0,
     475             :     0x03A1,
     476             :     0x03A3,
     477             :     0x03A3,
     478             :     0x03A4,
     479             :     0x03A5 | HAS_VOWEL,
     480             :     0x03A6,
     481             :     0x03A7,
     482             :     0x03A8,
     483             :     0x03A9 | HAS_VOWEL,
     484             :     0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
     485             :     0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
     486             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     487             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     488             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     489             :     0x03CF,
     490             :     0x0392,
     491             :     0x0398,
     492             :     0x03D2,
     493             :     0x03D2 | HAS_ACCENT,
     494             :     0x03D2 | HAS_DIALYTIKA,
     495             :     0x03A6,
     496             :     0x03A0,
     497             :     0x03CF,
     498             :     0x03D8,
     499             :     0x03D8,
     500             :     0x03DA,
     501             :     0x03DA,
     502             :     0x03DC,
     503             :     0x03DC,
     504             :     0x03DE,
     505             :     0x03DE,
     506             :     0x03E0,
     507             :     0x03E0,
     508             :     0,
     509             :     0,
     510             :     0,
     511             :     0,
     512             :     0,
     513             :     0,
     514             :     0,
     515             :     0,
     516             :     0,
     517             :     0,
     518             :     0,
     519             :     0,
     520             :     0,
     521             :     0,
     522             :     0x039A,
     523             :     0x03A1,
     524             :     0x03F9,
     525             :     0x037F,
     526             :     0x03F4,
     527             :     0x0395 | HAS_VOWEL,
     528             :     0,
     529             :     0x03F7,
     530             :     0x03F7,
     531             :     0x03F9,
     532             :     0x03FA,
     533             :     0x03FA,
     534             :     0x03FC,
     535             :     0x03FD,
     536             :     0x03FE,
     537             :     0x03FF,
     538             : };
     539             : 
     540             : static const uint16_t data1F00[] = {
     541             :     // U+1F00..1FFF
     542             :     0x0391 | HAS_VOWEL,
     543             :     0x0391 | HAS_VOWEL,
     544             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     545             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     546             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     547             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     548             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     549             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     550             :     0x0391 | HAS_VOWEL,
     551             :     0x0391 | HAS_VOWEL,
     552             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     553             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     554             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     555             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     556             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     557             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     558             :     0x0395 | HAS_VOWEL,
     559             :     0x0395 | HAS_VOWEL,
     560             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     561             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     562             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     563             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     564             :     0,
     565             :     0,
     566             :     0x0395 | HAS_VOWEL,
     567             :     0x0395 | HAS_VOWEL,
     568             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     569             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     570             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     571             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     572             :     0,
     573             :     0,
     574             :     0x0397 | HAS_VOWEL,
     575             :     0x0397 | HAS_VOWEL,
     576             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     577             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     578             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     579             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     580             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     581             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     582             :     0x0397 | HAS_VOWEL,
     583             :     0x0397 | HAS_VOWEL,
     584             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     585             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     586             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     587             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     588             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     589             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     590             :     0x0399 | HAS_VOWEL,
     591             :     0x0399 | HAS_VOWEL,
     592             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     593             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     594             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     595             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     596             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     597             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     598             :     0x0399 | HAS_VOWEL,
     599             :     0x0399 | HAS_VOWEL,
     600             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     601             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     602             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     603             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     604             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     605             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     606             :     0x039F | HAS_VOWEL,
     607             :     0x039F | HAS_VOWEL,
     608             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     609             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     610             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     611             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     612             :     0,
     613             :     0,
     614             :     0x039F | HAS_VOWEL,
     615             :     0x039F | HAS_VOWEL,
     616             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     617             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     618             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     619             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     620             :     0,
     621             :     0,
     622             :     0x03A5 | HAS_VOWEL,
     623             :     0x03A5 | HAS_VOWEL,
     624             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     625             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     626             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     627             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     628             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     629             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     630             :     0,
     631             :     0x03A5 | HAS_VOWEL,
     632             :     0,
     633             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     634             :     0,
     635             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     636             :     0,
     637             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     638             :     0x03A9 | HAS_VOWEL,
     639             :     0x03A9 | HAS_VOWEL,
     640             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     641             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     642             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     643             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     644             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     645             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     646             :     0x03A9 | HAS_VOWEL,
     647             :     0x03A9 | HAS_VOWEL,
     648             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     649             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     650             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     651             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     652             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     653             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     654             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     655             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     656             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     657             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     658             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     659             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     660             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     661             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     662             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     663             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     664             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     665             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     666             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     667             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     668             :     0,
     669             :     0,
     670             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     671             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     672             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     673             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     674             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     675             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     676             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     677             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     678             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     679             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     680             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     681             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     682             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     683             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     684             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     685             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     686             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     687             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     688             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     689             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     690             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     691             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     692             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     693             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     694             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     695             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     696             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     697             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     698             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     699             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     700             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     701             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     702             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     703             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     704             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     705             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     706             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     707             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     708             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     709             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     710             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     711             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     712             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     713             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     714             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     715             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     716             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     717             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     718             :     0x0391 | HAS_VOWEL,
     719             :     0x0391 | HAS_VOWEL,
     720             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     721             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     722             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     723             :     0,
     724             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     725             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     726             :     0x0391 | HAS_VOWEL,
     727             :     0x0391 | HAS_VOWEL,
     728             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     729             :     0x0391 | HAS_VOWEL | HAS_ACCENT,
     730             :     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     731             :     0,
     732             :     0x0399 | HAS_VOWEL,
     733             :     0,
     734             :     0,
     735             :     0,
     736             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     737             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     738             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     739             :     0,
     740             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     741             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     742             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     743             :     0x0395 | HAS_VOWEL | HAS_ACCENT,
     744             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     745             :     0x0397 | HAS_VOWEL | HAS_ACCENT,
     746             :     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     747             :     0,
     748             :     0,
     749             :     0,
     750             :     0x0399 | HAS_VOWEL,
     751             :     0x0399 | HAS_VOWEL,
     752             :     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     753             :     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     754             :     0,
     755             :     0,
     756             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     757             :     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     758             :     0x0399 | HAS_VOWEL,
     759             :     0x0399 | HAS_VOWEL,
     760             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     761             :     0x0399 | HAS_VOWEL | HAS_ACCENT,
     762             :     0,
     763             :     0,
     764             :     0,
     765             :     0,
     766             :     0x03A5 | HAS_VOWEL,
     767             :     0x03A5 | HAS_VOWEL,
     768             :     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     769             :     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     770             :     0x03A1,
     771             :     0x03A1,
     772             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     773             :     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
     774             :     0x03A5 | HAS_VOWEL,
     775             :     0x03A5 | HAS_VOWEL,
     776             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     777             :     0x03A5 | HAS_VOWEL | HAS_ACCENT,
     778             :     0x03A1,
     779             :     0,
     780             :     0,
     781             :     0,
     782             :     0,
     783             :     0,
     784             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     785             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     786             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     787             :     0,
     788             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     789             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
     790             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     791             :     0x039F | HAS_VOWEL | HAS_ACCENT,
     792             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     793             :     0x03A9 | HAS_VOWEL | HAS_ACCENT,
     794             :     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
     795             :     0,
     796             :     0,
     797             :     0,
     798             : };
     799             : 
     800             : // U+2126 Ohm sign
     801             : static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
     802             : 
     803           0 : uint32_t getLetterData(UChar32 c) {
     804           0 :     if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
     805           0 :         return 0;
     806           0 :     } else if (c <= 0x3ff) {
     807           0 :         return data0370[c - 0x370];
     808           0 :     } else if (c <= 0x1fff) {
     809           0 :         return data1F00[c - 0x1f00];
     810           0 :     } else if (c == 0x2126) {
     811           0 :         return data2126;
     812             :     } else {
     813           0 :         return 0;
     814             :     }
     815             : }
     816             : 
     817           0 : uint32_t getDiacriticData(UChar32 c) {
     818           0 :     switch (c) {
     819             :     case 0x0300:  // varia
     820             :     case 0x0301:  // tonos = oxia
     821             :     case 0x0342:  // perispomeni
     822             :     case 0x0302:  // circumflex can look like perispomeni
     823             :     case 0x0303:  // tilde can look like perispomeni
     824             :     case 0x0311:  // inverted breve can look like perispomeni
     825           0 :         return HAS_ACCENT;
     826             :     case 0x0308:  // dialytika = diaeresis
     827           0 :         return HAS_COMBINING_DIALYTIKA;
     828             :     case 0x0344:  // dialytika tonos
     829           0 :         return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
     830             :     case 0x0345:  // ypogegrammeni = iota subscript
     831           0 :         return HAS_YPOGEGRAMMENI;
     832             :     case 0x0304:  // macron
     833             :     case 0x0306:  // breve
     834             :     case 0x0313:  // comma above
     835             :     case 0x0314:  // reversed comma above
     836             :     case 0x0343:  // koronis
     837           0 :         return HAS_OTHER_GREEK_DIACRITIC;
     838             :     default:
     839           0 :         return 0;
     840             :     }
     841             : }
     842             : 
     843           0 : UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
     844           0 :     while (i < length) {
     845             :         UChar32 c;
     846           0 :         U16_NEXT(s, i, length, c);
     847           0 :         int32_t type = ucase_getTypeOrIgnorable(c);
     848           0 :         if ((type & UCASE_IGNORABLE) != 0) {
     849             :             // Case-ignorable, continue with the loop.
     850           0 :         } else if (type != UCASE_NONE) {
     851           0 :             return TRUE;  // Followed by cased letter.
     852             :         } else {
     853           0 :             return FALSE;  // Uncased and not case-ignorable.
     854             :         }
     855             :     }
     856           0 :     return FALSE;  // Not followed by cased letter.
     857             : }
     858             : 
     859             : /**
     860             :  * Greek string uppercasing with a state machine.
     861             :  * Probably simpler than a stateless function that has to figure out complex context-before
     862             :  * for each character.
     863             :  * TODO: Try to re-consolidate one way or another with the non-Greek function.
     864             :  */
     865           0 : int32_t toUpper(uint32_t options,
     866             :                 UChar *dest, int32_t destCapacity,
     867             :                 const UChar *src, int32_t srcLength,
     868             :                 Edits *edits,
     869             :                 UErrorCode &errorCode) {
     870           0 :     int32_t destIndex=0;
     871           0 :     uint32_t state = 0;
     872           0 :     for (int32_t i = 0; i < srcLength;) {
     873           0 :         int32_t nextIndex = i;
     874             :         UChar32 c;
     875           0 :         U16_NEXT(src, nextIndex, srcLength, c);
     876           0 :         uint32_t nextState = 0;
     877           0 :         int32_t type = ucase_getTypeOrIgnorable(c);
     878           0 :         if ((type & UCASE_IGNORABLE) != 0) {
     879             :             // c is case-ignorable
     880           0 :             nextState |= (state & AFTER_CASED);
     881           0 :         } else if (type != UCASE_NONE) {
     882             :             // c is cased
     883           0 :             nextState |= AFTER_CASED;
     884             :         }
     885           0 :         uint32_t data = getLetterData(c);
     886           0 :         if (data > 0) {
     887           0 :             uint32_t upper = data & UPPER_MASK;
     888             :             // Add a dialytika to this iota or ypsilon vowel
     889             :             // if we removed a tonos from the previous vowel,
     890             :             // and that previous vowel did not also have (or gain) a dialytika.
     891             :             // Adding one only to the final vowel in a longer sequence
     892             :             // (which does not occur in normal writing) would require lookahead.
     893             :             // Set the same flag as for preserving an existing dialytika.
     894           0 :             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
     895           0 :                     (upper == 0x399 || upper == 0x3A5)) {
     896           0 :                 data |= HAS_DIALYTIKA;
     897             :             }
     898           0 :             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
     899           0 :             if ((data & HAS_YPOGEGRAMMENI) != 0) {
     900           0 :                 numYpogegrammeni = 1;
     901             :             }
     902             :             // Skip combining diacritics after this Greek letter.
     903           0 :             while (nextIndex < srcLength) {
     904           0 :                 uint32_t diacriticData = getDiacriticData(src[nextIndex]);
     905           0 :                 if (diacriticData != 0) {
     906           0 :                     data |= diacriticData;
     907           0 :                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
     908           0 :                         ++numYpogegrammeni;
     909             :                     }
     910           0 :                     ++nextIndex;
     911             :                 } else {
     912           0 :                     break;  // not a Greek diacritic
     913             :                 }
     914             :             }
     915           0 :             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
     916           0 :                 nextState |= AFTER_VOWEL_WITH_ACCENT;
     917             :             }
     918             :             // Map according to Greek rules.
     919           0 :             UBool addTonos = FALSE;
     920           0 :             if (upper == 0x397 &&
     921           0 :                     (data & HAS_ACCENT) != 0 &&
     922           0 :                     numYpogegrammeni == 0 &&
     923           0 :                     (state & AFTER_CASED) == 0 &&
     924           0 :                     !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
     925             :                 // Keep disjunctive "or" with (only) a tonos.
     926             :                 // We use the same "word boundary" conditions as for the Final_Sigma test.
     927           0 :                 if (i == nextIndex) {
     928           0 :                     upper = 0x389;  // Preserve the precomposed form.
     929             :                 } else {
     930           0 :                     addTonos = TRUE;
     931             :                 }
     932           0 :             } else if ((data & HAS_DIALYTIKA) != 0) {
     933             :                 // Preserve a vowel with dialytika in precomposed form if it exists.
     934           0 :                 if (upper == 0x399) {
     935           0 :                     upper = 0x3AA;
     936           0 :                     data &= ~HAS_EITHER_DIALYTIKA;
     937           0 :                 } else if (upper == 0x3A5) {
     938           0 :                     upper = 0x3AB;
     939           0 :                     data &= ~HAS_EITHER_DIALYTIKA;
     940             :                 }
     941             :             }
     942             : 
     943           0 :             UBool change = TRUE;
     944           0 :             if (edits != NULL) {
     945             :                 // Find out first whether we are changing the text.
     946           0 :                 change = src[i] != upper || numYpogegrammeni > 0;
     947           0 :                 int32_t i2 = i + 1;
     948           0 :                 if ((data & HAS_EITHER_DIALYTIKA) != 0) {
     949           0 :                     change |= i2 >= nextIndex || src[i2] != 0x308;
     950           0 :                     ++i2;
     951             :                 }
     952           0 :                 if (addTonos) {
     953           0 :                     change |= i2 >= nextIndex || src[i2] != 0x301;
     954           0 :                     ++i2;
     955             :                 }
     956           0 :                 int32_t oldLength = nextIndex - i;
     957           0 :                 int32_t newLength = (i2 - i) + numYpogegrammeni;
     958           0 :                 change |= oldLength != newLength;
     959           0 :                 if (change) {
     960           0 :                     if (edits != NULL) {
     961           0 :                         edits->addReplace(oldLength, newLength);
     962             :                     }
     963             :                 } else {
     964           0 :                     if (edits != NULL) {
     965           0 :                         edits->addUnchanged(oldLength);
     966             :                     }
     967             :                     // Write unchanged text?
     968           0 :                     change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
     969             :                 }
     970             :             }
     971             : 
     972           0 :             if (change) {
     973           0 :                 destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
     974           0 :                 if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
     975           0 :                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
     976             :                 }
     977           0 :                 if (destIndex >= 0 && addTonos) {
     978           0 :                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
     979             :                 }
     980           0 :                 while (destIndex >= 0 && numYpogegrammeni > 0) {
     981           0 :                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
     982           0 :                     --numYpogegrammeni;
     983             :                 }
     984           0 :                 if(destIndex<0) {
     985           0 :                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     986           0 :                     return 0;
     987             :                 }
     988             :             }
     989             :         } else {
     990             :             const UChar *s;
     991           0 :             c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
     992           0 :             destIndex = appendResult(dest, destIndex, destCapacity, c, s,
     993           0 :                                      nextIndex - i, options, edits);
     994           0 :             if (destIndex < 0) {
     995           0 :                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     996           0 :                 return 0;
     997             :             }
     998             :         }
     999           0 :         i = nextIndex;
    1000           0 :         state = nextState;
    1001             :     }
    1002             : 
    1003           0 :     return destIndex;
    1004             : }
    1005             : 
    1006             : }  // namespace GreekUpper
    1007             : U_NAMESPACE_END
    1008             : 
    1009             : /* functions available in the common library (for unistr_case.cpp) */
    1010             : 
    1011             : U_CFUNC int32_t U_CALLCONV
    1012           0 : ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    1013             :                          UChar *dest, int32_t destCapacity,
    1014             :                          const UChar *src, int32_t srcLength,
    1015             :                          icu::Edits *edits,
    1016             :                          UErrorCode &errorCode) {
    1017           0 :     UCaseContext csc=UCASECONTEXT_INITIALIZER;
    1018           0 :     csc.p=(void *)src;
    1019           0 :     csc.limit=srcLength;
    1020             :     int32_t destIndex = _caseMap(
    1021             :         caseLocale, options, ucase_toFullLower,
    1022             :         dest, destCapacity,
    1023             :         src, &csc, 0, srcLength,
    1024           0 :         edits, errorCode);
    1025           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
    1026             : }
    1027             : 
    1028             : U_CFUNC int32_t U_CALLCONV
    1029           0 : ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    1030             :                          UChar *dest, int32_t destCapacity,
    1031             :                          const UChar *src, int32_t srcLength,
    1032             :                          icu::Edits *edits,
    1033             :                          UErrorCode &errorCode) {
    1034             :     int32_t destIndex;
    1035           0 :     if (caseLocale == UCASE_LOC_GREEK) {
    1036             :         destIndex = GreekUpper::toUpper(options, dest, destCapacity,
    1037           0 :                                         src, srcLength, edits, errorCode);
    1038             :     } else {
    1039           0 :         UCaseContext csc=UCASECONTEXT_INITIALIZER;
    1040           0 :         csc.p=(void *)src;
    1041           0 :         csc.limit=srcLength;
    1042             :         destIndex = _caseMap(
    1043             :             caseLocale, options, ucase_toFullUpper,
    1044             :             dest, destCapacity,
    1045             :             src, &csc, 0, srcLength,
    1046           0 :             edits, errorCode);
    1047             :     }
    1048           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
    1049             : }
    1050             : 
    1051             : U_CFUNC int32_t U_CALLCONV
    1052           0 : ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
    1053             :                       UChar *dest, int32_t destCapacity,
    1054             :                       const UChar *src, int32_t srcLength,
    1055             :                       icu::Edits *edits,
    1056             :                       UErrorCode &errorCode) {
    1057             :     /* case mapping loop */
    1058           0 :     int32_t srcIndex = 0;
    1059           0 :     int32_t destIndex = 0;
    1060           0 :     while (srcIndex < srcLength) {
    1061           0 :         int32_t cpStart = srcIndex;
    1062             :         UChar32 c;
    1063           0 :         U16_NEXT(src, srcIndex, srcLength, c);
    1064             :         const UChar *s;
    1065           0 :         c = ucase_toFullFolding(c, &s, options);
    1066           0 :         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
    1067           0 :                                  srcIndex - cpStart, options, edits);
    1068           0 :         if (destIndex < 0) {
    1069           0 :             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    1070           0 :             return 0;
    1071             :         }
    1072             :     }
    1073             : 
    1074           0 :     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
    1075             : }
    1076             : 
    1077             : U_CFUNC int32_t
    1078           0 : ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
    1079             :              UChar *dest, int32_t destCapacity,
    1080             :              const UChar *src, int32_t srcLength,
    1081             :              UStringCaseMapper *stringCaseMapper,
    1082             :              icu::Edits *edits,
    1083             :              UErrorCode &errorCode) {
    1084             :     int32_t destLength;
    1085             : 
    1086             :     /* check argument values */
    1087           0 :     if(U_FAILURE(errorCode)) {
    1088           0 :         return 0;
    1089             :     }
    1090           0 :     if( destCapacity<0 ||
    1091           0 :         (dest==NULL && destCapacity>0) ||
    1092           0 :         src==NULL ||
    1093             :         srcLength<-1
    1094             :     ) {
    1095           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1096           0 :         return 0;
    1097             :     }
    1098             : 
    1099             :     /* get the string length */
    1100           0 :     if(srcLength==-1) {
    1101           0 :         srcLength=u_strlen(src);
    1102             :     }
    1103             : 
    1104             :     /* check for overlapping source and destination */
    1105           0 :     if( dest!=NULL &&
    1106           0 :         ((src>=dest && src<(dest+destCapacity)) ||
    1107           0 :          (dest>=src && dest<(src+srcLength)))
    1108             :     ) {
    1109           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1110           0 :         return 0;
    1111             :     }
    1112             : 
    1113           0 :     if(edits!=NULL) {
    1114           0 :         edits->reset();
    1115             :     }
    1116             :     destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
    1117           0 :                                 dest, destCapacity, src, srcLength, edits, errorCode);
    1118           0 :     return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
    1119             : }
    1120             : 
    1121             : U_CFUNC int32_t
    1122           0 : ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
    1123             :                         UChar *dest, int32_t destCapacity,
    1124             :                         const UChar *src, int32_t srcLength,
    1125             :                         UStringCaseMapper *stringCaseMapper,
    1126             :                         UErrorCode &errorCode) {
    1127             :     UChar buffer[300];
    1128             :     UChar *temp;
    1129             : 
    1130             :     int32_t destLength;
    1131             : 
    1132             :     /* check argument values */
    1133           0 :     if(U_FAILURE(errorCode)) {
    1134           0 :         return 0;
    1135             :     }
    1136           0 :     if( destCapacity<0 ||
    1137           0 :         (dest==NULL && destCapacity>0) ||
    1138           0 :         src==NULL ||
    1139             :         srcLength<-1
    1140             :     ) {
    1141           0 :         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1142           0 :         return 0;
    1143             :     }
    1144             : 
    1145             :     /* get the string length */
    1146           0 :     if(srcLength==-1) {
    1147           0 :         srcLength=u_strlen(src);
    1148             :     }
    1149             : 
    1150             :     /* check for overlapping source and destination */
    1151           0 :     if( dest!=NULL &&
    1152           0 :         ((src>=dest && src<(dest+destCapacity)) ||
    1153           0 :          (dest>=src && dest<(src+srcLength)))
    1154             :     ) {
    1155             :         /* overlap: provide a temporary destination buffer and later copy the result */
    1156           0 :         if(destCapacity<=UPRV_LENGTHOF(buffer)) {
    1157             :             /* the stack buffer is large enough */
    1158           0 :             temp=buffer;
    1159             :         } else {
    1160             :             /* allocate a buffer */
    1161           0 :             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
    1162           0 :             if(temp==NULL) {
    1163           0 :                 errorCode=U_MEMORY_ALLOCATION_ERROR;
    1164           0 :                 return 0;
    1165             :             }
    1166             :         }
    1167             :     } else {
    1168           0 :         temp=dest;
    1169             :     }
    1170             : 
    1171             :     destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
    1172           0 :                                 temp, destCapacity, src, srcLength, NULL, errorCode);
    1173           0 :     if(temp!=dest) {
    1174             :         /* copy the result string to the destination buffer */
    1175           0 :         if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
    1176           0 :             u_memmove(dest, temp, destLength);
    1177             :         }
    1178           0 :         if(temp!=buffer) {
    1179           0 :             uprv_free(temp);
    1180             :         }
    1181             :     }
    1182             : 
    1183           0 :     return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
    1184             : }
    1185             : 
    1186             : /* public API functions */
    1187             : 
    1188             : U_CAPI int32_t U_EXPORT2
    1189           0 : u_strFoldCase(UChar *dest, int32_t destCapacity,
    1190             :               const UChar *src, int32_t srcLength,
    1191             :               uint32_t options,
    1192             :               UErrorCode *pErrorCode) {
    1193             :     return ustrcase_mapWithOverlap(
    1194             :         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
    1195             :         dest, destCapacity,
    1196             :         src, srcLength,
    1197           0 :         ustrcase_internalFold, *pErrorCode);
    1198             : }
    1199             : 
    1200             : U_NAMESPACE_BEGIN
    1201             : 
    1202           0 : int32_t CaseMap::fold(
    1203             :         uint32_t options,
    1204             :         const UChar *src, int32_t srcLength,
    1205             :         UChar *dest, int32_t destCapacity, Edits *edits,
    1206             :         UErrorCode &errorCode) {
    1207             :     return ustrcase_map(
    1208             :         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
    1209             :         dest, destCapacity,
    1210             :         src, srcLength,
    1211           0 :         ustrcase_internalFold, edits, errorCode);
    1212             : }
    1213             : 
    1214             : U_NAMESPACE_END
    1215             : 
    1216             : /* case-insensitive string comparisons -------------------------------------- */
    1217             : 
    1218             : /*
    1219             :  * This function is a copy of unorm_cmpEquivFold() minus the parts for
    1220             :  * canonical equivalence.
    1221             :  * Keep the functions in sync, and see there for how this works.
    1222             :  * The duplication is for modularization:
    1223             :  * It makes caseless (but not canonical caseless) matches independent of
    1224             :  * the normalization code.
    1225             :  */
    1226             : 
    1227             : /* stack element for previous-level source/decomposition pointers */
    1228             : struct CmpEquivLevel {
    1229             :     const UChar *start, *s, *limit;
    1230             : };
    1231             : typedef struct CmpEquivLevel CmpEquivLevel;
    1232             : 
    1233             : /**
    1234             :  * Internal implementation code comparing string with case fold.
    1235             :  * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
    1236             :  *
    1237             :  * @param s1            input string 1
    1238             :  * @param length1       length of string 1, or -1 (NULL terminated)
    1239             :  * @param s2            input string 2
    1240             :  * @param length2       length of string 2, or -1 (NULL terminated)
    1241             :  * @param options       compare options
    1242             :  * @param matchLen1     (output) length of partial prefix match in s1
    1243             :  * @param matchLen2     (output) length of partial prefix match in s2
    1244             :  * @param pErrorCode    receives error status
    1245             :  * @return The result of comparison
    1246             :  */
    1247           0 : static int32_t _cmpFold(
    1248             :             const UChar *s1, int32_t length1,
    1249             :             const UChar *s2, int32_t length2,
    1250             :             uint32_t options,
    1251             :             int32_t *matchLen1, int32_t *matchLen2,
    1252             :             UErrorCode *pErrorCode) {
    1253           0 :     int32_t cmpRes = 0;
    1254             : 
    1255             :     /* current-level start/limit - s1/s2 as current */
    1256             :     const UChar *start1, *start2, *limit1, *limit2;
    1257             : 
    1258             :     /* points to the original start address */
    1259             :     const UChar *org1, *org2;
    1260             : 
    1261             :     /* points to the end of match + 1 */
    1262             :     const UChar *m1, *m2;
    1263             : 
    1264             :     /* case folding variables */
    1265             :     const UChar *p;
    1266             :     int32_t length;
    1267             : 
    1268             :     /* stacks of previous-level start/current/limit */
    1269             :     CmpEquivLevel stack1[2], stack2[2];
    1270             : 
    1271             :     /* case folding buffers, only use current-level start/limit */
    1272             :     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
    1273             : 
    1274             :     /* track which is the current level per string */
    1275             :     int32_t level1, level2;
    1276             : 
    1277             :     /* current code units, and code points for lookups */
    1278             :     UChar32 c1, c2, cp1, cp2;
    1279             : 
    1280             :     /* no argument error checking because this itself is not an API */
    1281             : 
    1282             :     /*
    1283             :      * assume that at least the option U_COMPARE_IGNORE_CASE is set
    1284             :      * otherwise this function would have to behave exactly as uprv_strCompare()
    1285             :      */
    1286           0 :     if(U_FAILURE(*pErrorCode)) {
    1287           0 :         return 0;
    1288             :     }
    1289             : 
    1290             :     /* initialize */
    1291           0 :     if(matchLen1) {
    1292           0 :         U_ASSERT(matchLen2 !=NULL);
    1293           0 :         *matchLen1=0;
    1294           0 :         *matchLen2=0;
    1295             :     }
    1296             : 
    1297           0 :     start1=m1=org1=s1;
    1298           0 :     if(length1==-1) {
    1299           0 :         limit1=NULL;
    1300             :     } else {
    1301           0 :         limit1=s1+length1;
    1302             :     }
    1303             : 
    1304           0 :     start2=m2=org2=s2;
    1305           0 :     if(length2==-1) {
    1306           0 :         limit2=NULL;
    1307             :     } else {
    1308           0 :         limit2=s2+length2;
    1309             :     }
    1310             : 
    1311           0 :     level1=level2=0;
    1312           0 :     c1=c2=-1;
    1313             : 
    1314             :     /* comparison loop */
    1315             :     for(;;) {
    1316             :         /*
    1317             :          * here a code unit value of -1 means "get another code unit"
    1318             :          * below it will mean "this source is finished"
    1319             :          */
    1320             : 
    1321           0 :         if(c1<0) {
    1322             :             /* get next code unit from string 1, post-increment */
    1323             :             for(;;) {
    1324           0 :                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
    1325           0 :                     if(level1==0) {
    1326           0 :                         c1=-1;
    1327           0 :                         break;
    1328             :                     }
    1329             :                 } else {
    1330           0 :                     ++s1;
    1331           0 :                     break;
    1332             :                 }
    1333             : 
    1334             :                 /* reached end of level buffer, pop one level */
    1335           0 :                 do {
    1336           0 :                     --level1;
    1337           0 :                     start1=stack1[level1].start;    /*Not uninitialized*/
    1338           0 :                 } while(start1==NULL);
    1339           0 :                 s1=stack1[level1].s;                /*Not uninitialized*/
    1340           0 :                 limit1=stack1[level1].limit;        /*Not uninitialized*/
    1341             :             }
    1342             :         }
    1343             : 
    1344           0 :         if(c2<0) {
    1345             :             /* get next code unit from string 2, post-increment */
    1346             :             for(;;) {
    1347           0 :                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
    1348           0 :                     if(level2==0) {
    1349           0 :                         c2=-1;
    1350           0 :                         break;
    1351             :                     }
    1352             :                 } else {
    1353           0 :                     ++s2;
    1354           0 :                     break;
    1355             :                 }
    1356             : 
    1357             :                 /* reached end of level buffer, pop one level */
    1358           0 :                 do {
    1359           0 :                     --level2;
    1360           0 :                     start2=stack2[level2].start;    /*Not uninitialized*/
    1361           0 :                 } while(start2==NULL);
    1362           0 :                 s2=stack2[level2].s;                /*Not uninitialized*/
    1363           0 :                 limit2=stack2[level2].limit;        /*Not uninitialized*/
    1364             :             }
    1365             :         }
    1366             : 
    1367             :         /*
    1368             :          * compare c1 and c2
    1369             :          * either variable c1, c2 is -1 only if the corresponding string is finished
    1370             :          */
    1371           0 :         if(c1==c2) {
    1372             :             const UChar *next1, *next2;
    1373             : 
    1374           0 :             if(c1<0) {
    1375           0 :                 cmpRes=0;   /* c1==c2==-1 indicating end of strings */
    1376           0 :                 break;
    1377             :             }
    1378             : 
    1379             :             /*
    1380             :              * Note: Move the match positions in both strings at the same time
    1381             :              *      only when corresponding code point(s) in the original strings
    1382             :              *      are fully consumed. For example, when comparing s1="Fust" and
    1383             :              *      s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
    1384             :              *      the first code point in the case-folded data. But the second "s"
    1385             :              *      has no matching code point in s1, so this implementation returns
    1386             :              *      2 as the prefix match length ("Fu").
    1387             :              */
    1388           0 :             next1=next2=NULL;
    1389           0 :             if(level1==0) {
    1390           0 :                 next1=s1;
    1391           0 :             } else if(s1==limit1) {
    1392             :                 /* Note: This implementation only use a single level of stack.
    1393             :                  *      If this code needs to be changed to use multiple levels
    1394             :                  *      of stacks, the code above should check if the current
    1395             :                  *      code is at the end of all stacks.
    1396             :                  */
    1397           0 :                 U_ASSERT(level1==1);
    1398             : 
    1399             :                 /* is s1 at the end of the current stack? */
    1400           0 :                 next1=stack1[0].s;
    1401             :             }
    1402             : 
    1403           0 :             if (next1!=NULL) {
    1404           0 :                 if(level2==0) {
    1405           0 :                     next2=s2;
    1406           0 :                 } else if(s2==limit2) {
    1407           0 :                     U_ASSERT(level2==1);
    1408             : 
    1409             :                     /* is s2 at the end of the current stack? */
    1410           0 :                     next2=stack2[0].s;
    1411             :                 }
    1412           0 :                 if(next2!=NULL) {
    1413           0 :                     m1=next1;
    1414           0 :                     m2=next2;
    1415             :                 }
    1416             :             }
    1417           0 :             c1=c2=-1;       /* make us fetch new code units */
    1418           0 :             continue;
    1419           0 :         } else if(c1<0) {
    1420           0 :             cmpRes=-1;      /* string 1 ends before string 2 */
    1421           0 :             break;
    1422           0 :         } else if(c2<0) {
    1423           0 :             cmpRes=1;       /* string 2 ends before string 1 */
    1424           0 :             break;
    1425             :         }
    1426             :         /* c1!=c2 && c1>=0 && c2>=0 */
    1427             : 
    1428             :         /* get complete code points for c1, c2 for lookups if either is a surrogate */
    1429           0 :         cp1=c1;
    1430           0 :         if(U_IS_SURROGATE(c1)) {
    1431             :             UChar c;
    1432             : 
    1433           0 :             if(U_IS_SURROGATE_LEAD(c1)) {
    1434           0 :                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
    1435             :                     /* advance ++s1; only below if cp1 decomposes/case-folds */
    1436           0 :                     cp1=U16_GET_SUPPLEMENTARY(c1, c);
    1437             :                 }
    1438             :             } else /* isTrail(c1) */ {
    1439           0 :                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
    1440           0 :                     cp1=U16_GET_SUPPLEMENTARY(c, c1);
    1441             :                 }
    1442             :             }
    1443             :         }
    1444             : 
    1445           0 :         cp2=c2;
    1446           0 :         if(U_IS_SURROGATE(c2)) {
    1447             :             UChar c;
    1448             : 
    1449           0 :             if(U_IS_SURROGATE_LEAD(c2)) {
    1450           0 :                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
    1451             :                     /* advance ++s2; only below if cp2 decomposes/case-folds */
    1452           0 :                     cp2=U16_GET_SUPPLEMENTARY(c2, c);
    1453             :                 }
    1454             :             } else /* isTrail(c2) */ {
    1455           0 :                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
    1456           0 :                     cp2=U16_GET_SUPPLEMENTARY(c, c2);
    1457             :                 }
    1458             :             }
    1459             :         }
    1460             : 
    1461             :         /*
    1462             :          * go down one level for each string
    1463             :          * continue with the main loop as soon as there is a real change
    1464             :          */
    1465             : 
    1466           0 :         if( level1==0 &&
    1467             :             (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
    1468             :         ) {
    1469             :             /* cp1 case-folds to the code point "length" or to p[length] */
    1470           0 :             if(U_IS_SURROGATE(c1)) {
    1471           0 :                 if(U_IS_SURROGATE_LEAD(c1)) {
    1472             :                     /* advance beyond source surrogate pair if it case-folds */
    1473           0 :                     ++s1;
    1474             :                 } else /* isTrail(c1) */ {
    1475             :                     /*
    1476             :                      * we got a supplementary code point when hitting its trail surrogate,
    1477             :                      * therefore the lead surrogate must have been the same as in the other string;
    1478             :                      * compare this decomposition with the lead surrogate in the other string
    1479             :                      * remember that this simulates bulk text replacement:
    1480             :                      * the decomposition would replace the entire code point
    1481             :                      */
    1482           0 :                     --s2;
    1483           0 :                     --m2;
    1484           0 :                     c2=*(s2-1);
    1485             :                 }
    1486             :             }
    1487             : 
    1488             :             /* push current level pointers */
    1489           0 :             stack1[0].start=start1;
    1490           0 :             stack1[0].s=s1;
    1491           0 :             stack1[0].limit=limit1;
    1492           0 :             ++level1;
    1493             : 
    1494             :             /* copy the folding result to fold1[] */
    1495           0 :             if(length<=UCASE_MAX_STRING_LENGTH) {
    1496           0 :                 u_memcpy(fold1, p, length);
    1497             :             } else {
    1498           0 :                 int32_t i=0;
    1499           0 :                 U16_APPEND_UNSAFE(fold1, i, length);
    1500           0 :                 length=i;
    1501             :             }
    1502             : 
    1503             :             /* set next level pointers to case folding */
    1504           0 :             start1=s1=fold1;
    1505           0 :             limit1=fold1+length;
    1506             : 
    1507             :             /* get ready to read from decomposition, continue with loop */
    1508           0 :             c1=-1;
    1509           0 :             continue;
    1510             :         }
    1511             : 
    1512           0 :         if( level2==0 &&
    1513             :             (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
    1514             :         ) {
    1515             :             /* cp2 case-folds to the code point "length" or to p[length] */
    1516           0 :             if(U_IS_SURROGATE(c2)) {
    1517           0 :                 if(U_IS_SURROGATE_LEAD(c2)) {
    1518             :                     /* advance beyond source surrogate pair if it case-folds */
    1519           0 :                     ++s2;
    1520             :                 } else /* isTrail(c2) */ {
    1521             :                     /*
    1522             :                      * we got a supplementary code point when hitting its trail surrogate,
    1523             :                      * therefore the lead surrogate must have been the same as in the other string;
    1524             :                      * compare this decomposition with the lead surrogate in the other string
    1525             :                      * remember that this simulates bulk text replacement:
    1526             :                      * the decomposition would replace the entire code point
    1527             :                      */
    1528           0 :                     --s1;
    1529           0 :                     --m2;
    1530           0 :                     c1=*(s1-1);
    1531             :                 }
    1532             :             }
    1533             : 
    1534             :             /* push current level pointers */
    1535           0 :             stack2[0].start=start2;
    1536           0 :             stack2[0].s=s2;
    1537           0 :             stack2[0].limit=limit2;
    1538           0 :             ++level2;
    1539             : 
    1540             :             /* copy the folding result to fold2[] */
    1541           0 :             if(length<=UCASE_MAX_STRING_LENGTH) {
    1542           0 :                 u_memcpy(fold2, p, length);
    1543             :             } else {
    1544           0 :                 int32_t i=0;
    1545           0 :                 U16_APPEND_UNSAFE(fold2, i, length);
    1546           0 :                 length=i;
    1547             :             }
    1548             : 
    1549             :             /* set next level pointers to case folding */
    1550           0 :             start2=s2=fold2;
    1551           0 :             limit2=fold2+length;
    1552             : 
    1553             :             /* get ready to read from decomposition, continue with loop */
    1554           0 :             c2=-1;
    1555           0 :             continue;
    1556             :         }
    1557             : 
    1558             :         /*
    1559             :          * no decomposition/case folding, max level for both sides:
    1560             :          * return difference result
    1561             :          *
    1562             :          * code point order comparison must not just return cp1-cp2
    1563             :          * because when single surrogates are present then the surrogate pairs
    1564             :          * that formed cp1 and cp2 may be from different string indexes
    1565             :          *
    1566             :          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
    1567             :          * c1=d800 cp1=10001 c2=dc00 cp2=10000
    1568             :          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
    1569             :          *
    1570             :          * therefore, use same fix-up as in ustring.c/uprv_strCompare()
    1571             :          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
    1572             :          * so we have slightly different pointer/start/limit comparisons here
    1573             :          */
    1574             : 
    1575           0 :         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
    1576             :             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
    1577           0 :             if(
    1578           0 :                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
    1579           0 :                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
    1580             :             ) {
    1581             :                 /* part of a surrogate pair, leave >=d800 */
    1582             :             } else {
    1583             :                 /* BMP code point - may be surrogate code point - make <d800 */
    1584           0 :                 c1-=0x2800;
    1585             :             }
    1586             : 
    1587           0 :             if(
    1588           0 :                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
    1589           0 :                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
    1590             :             ) {
    1591             :                 /* part of a surrogate pair, leave >=d800 */
    1592             :             } else {
    1593             :                 /* BMP code point - may be surrogate code point - make <d800 */
    1594           0 :                 c2-=0x2800;
    1595             :             }
    1596             :         }
    1597             : 
    1598           0 :         cmpRes=c1-c2;
    1599           0 :         break;
    1600           0 :     }
    1601             : 
    1602           0 :     if(matchLen1) {
    1603           0 :         *matchLen1=m1-org1;
    1604           0 :         *matchLen2=m2-org2;
    1605             :     }
    1606           0 :     return cmpRes;
    1607             : }
    1608             : 
    1609             : /* internal function */
    1610             : U_CFUNC int32_t
    1611           0 : u_strcmpFold(const UChar *s1, int32_t length1,
    1612             :              const UChar *s2, int32_t length2,
    1613             :              uint32_t options,
    1614             :              UErrorCode *pErrorCode) {
    1615           0 :     return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
    1616             : }
    1617             : 
    1618             : /* public API functions */
    1619             : 
    1620             : U_CAPI int32_t U_EXPORT2
    1621           0 : u_strCaseCompare(const UChar *s1, int32_t length1,
    1622             :                  const UChar *s2, int32_t length2,
    1623             :                  uint32_t options,
    1624             :                  UErrorCode *pErrorCode) {
    1625             :     /* argument checking */
    1626           0 :     if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
    1627           0 :         return 0;
    1628             :     }
    1629           0 :     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
    1630           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    1631           0 :         return 0;
    1632             :     }
    1633           0 :     return u_strcmpFold(s1, length1, s2, length2,
    1634             :                         options|U_COMPARE_IGNORE_CASE,
    1635           0 :                         pErrorCode);
    1636             : }
    1637             : 
    1638             : U_CAPI int32_t U_EXPORT2
    1639           0 : u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
    1640           0 :     UErrorCode errorCode=U_ZERO_ERROR;
    1641           0 :     return u_strcmpFold(s1, -1, s2, -1,
    1642             :                         options|U_COMPARE_IGNORE_CASE,
    1643           0 :                         &errorCode);
    1644             : }
    1645             : 
    1646             : U_CAPI int32_t U_EXPORT2
    1647           0 : u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
    1648           0 :     UErrorCode errorCode=U_ZERO_ERROR;
    1649           0 :     return u_strcmpFold(s1, length, s2, length,
    1650             :                         options|U_COMPARE_IGNORE_CASE,
    1651           0 :                         &errorCode);
    1652             : }
    1653             : 
    1654             : U_CAPI int32_t U_EXPORT2
    1655           0 : u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
    1656           0 :     UErrorCode errorCode=U_ZERO_ERROR;
    1657           0 :     return u_strcmpFold(s1, n, s2, n,
    1658             :                         options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
    1659           0 :                         &errorCode);
    1660             : }
    1661             : 
    1662             : /* internal API - detect length of shared prefix */
    1663             : U_CAPI void
    1664           0 : u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
    1665             :                              const UChar *s2, int32_t length2,
    1666             :                              uint32_t options,
    1667             :                              int32_t *matchLen1, int32_t *matchLen2,
    1668             :                              UErrorCode *pErrorCode) {
    1669             :     _cmpFold(s1, length1, s2, length2, options,
    1670           0 :         matchLen1, matchLen2, pErrorCode);
    1671           0 : }

Generated by: LCOV version 1.13