LCOV - code coverage report
Current view: top level - intl/icu/source/common - unormcmp.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 195 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *
       6             : *   Copyright (C) 2001-2014, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : *******************************************************************************
      10             : *   file name:  unormcmp.cpp
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2004sep13
      16             : *   created by: Markus W. Scherer
      17             : *
      18             : *   unorm_compare() function moved here from unorm.cpp for better modularization.
      19             : *   Depends on both normalization and case folding.
      20             : *   Allows unorm.cpp to not depend on any character properties code.
      21             : */
      22             : 
      23             : #include "unicode/utypes.h"
      24             : 
      25             : #if !UCONFIG_NO_NORMALIZATION
      26             : 
      27             : #include "unicode/unorm.h"
      28             : #include "unicode/ustring.h"
      29             : #include "cmemory.h"
      30             : #include "normalizer2impl.h"
      31             : #include "ucase.h"
      32             : #include "uprops.h"
      33             : #include "ustr_imp.h"
      34             : 
      35             : U_NAMESPACE_USE
      36             : 
      37             : /* compare canonically equivalent ------------------------------------------- */
      38             : 
      39             : /*
      40             :  * Compare two strings for canonical equivalence.
      41             :  * Further options include case-insensitive comparison and
      42             :  * code point order (as opposed to code unit order).
      43             :  *
      44             :  * In this function, canonical equivalence is optional as well.
      45             :  * If canonical equivalence is tested, then both strings must fulfill
      46             :  * the FCD check.
      47             :  *
      48             :  * Semantically, this is equivalent to
      49             :  *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
      50             :  * where code point order, NFD and foldCase are all optional.
      51             :  *
      52             :  * String comparisons almost always yield results before processing both strings
      53             :  * completely.
      54             :  * They are generally more efficient working incrementally instead of
      55             :  * performing the sub-processing (strlen, normalization, case-folding)
      56             :  * on the entire strings first.
      57             :  *
      58             :  * It is also unnecessary to not normalize identical characters.
      59             :  *
      60             :  * This function works in principle as follows:
      61             :  *
      62             :  * loop {
      63             :  *   get one code unit c1 from s1 (-1 if end of source)
      64             :  *   get one code unit c2 from s2 (-1 if end of source)
      65             :  *
      66             :  *   if(either string finished) {
      67             :  *     return result;
      68             :  *   }
      69             :  *   if(c1==c2) {
      70             :  *     continue;
      71             :  *   }
      72             :  *
      73             :  *   // c1!=c2
      74             :  *   try to decompose/case-fold c1/c2, and continue if one does;
      75             :  *
      76             :  *   // still c1!=c2 and neither decomposes/case-folds, return result
      77             :  *   return c1-c2;
      78             :  * }
      79             :  *
      80             :  * When a character decomposes, then the pointer for that source changes to
      81             :  * the decomposition, pushing the previous pointer onto a stack.
      82             :  * When the end of the decomposition is reached, then the code unit reader
      83             :  * pops the previous source from the stack.
      84             :  * (Same for case-folding.)
      85             :  *
      86             :  * This is complicated further by operating on variable-width UTF-16.
      87             :  * The top part of the loop works on code units, while lookups for decomposition
      88             :  * and case-folding need code points.
      89             :  * Code points are assembled after the equality/end-of-source part.
      90             :  * The source pointer is only advanced beyond all code units when the code point
      91             :  * actually decomposes/case-folds.
      92             :  *
      93             :  * If we were on a trail surrogate unit when assembling a code point,
      94             :  * and the code point decomposes/case-folds, then the decomposition/folding
      95             :  * result must be compared with the part of the other string that corresponds to
      96             :  * this string's lead surrogate.
      97             :  * Since we only assemble a code point when hitting a trail unit when the
      98             :  * preceding lead units were identical, we back up the other string by one unit
      99             :  * in such a case.
     100             :  *
     101             :  * The optional code point order comparison at the end works with
     102             :  * the same fix-up as the other code point order comparison functions.
     103             :  * See ustring.c and the comment near the end of this function.
     104             :  *
     105             :  * Assumption: A decomposition or case-folding result string never contains
     106             :  * a single surrogate. This is a safe assumption in the Unicode Standard.
     107             :  * Therefore, we do not need to check for surrogate pairs across
     108             :  * decomposition/case-folding boundaries.
     109             :  *
     110             :  * Further assumptions (see verifications tstnorm.cpp):
     111             :  * The API function checks for FCD first, while the core function
     112             :  * first case-folds and then decomposes. This requires that case-folding does not
     113             :  * un-FCD any strings.
     114             :  *
     115             :  * The API function may also NFD the input and turn off decomposition.
     116             :  * This requires that case-folding does not un-NFD strings either.
     117             :  *
     118             :  * TODO If any of the above two assumptions is violated,
     119             :  * then this entire code must be re-thought.
     120             :  * If this happens, then a simple solution is to case-fold both strings up front
     121             :  * and to turn off UNORM_INPUT_IS_FCD.
     122             :  * We already do this when not both strings are in FCD because makeFCD
     123             :  * would be a partial NFD before the case folding, which does not work.
     124             :  * Note that all of this is only a problem when case-folding _and_
     125             :  * canonical equivalence come together.
     126             :  * (Comments in unorm_compare() are more up to date than this TODO.)
     127             :  */
     128             : 
     129             : /* stack element for previous-level source/decomposition pointers */
     130             : struct CmpEquivLevel {
     131             :     const UChar *start, *s, *limit;
     132             : };
     133             : typedef struct CmpEquivLevel CmpEquivLevel;
     134             : 
     135             : /**
     136             :  * Internal option for unorm_cmpEquivFold() for decomposing.
     137             :  * If not set, just do strcasecmp().
     138             :  */
     139             : #define _COMPARE_EQUIV 0x80000
     140             : 
     141             : /* internal function */
     142             : static int32_t
     143           0 : unorm_cmpEquivFold(const UChar *s1, int32_t length1,
     144             :                    const UChar *s2, int32_t length2,
     145             :                    uint32_t options,
     146             :                    UErrorCode *pErrorCode) {
     147             :     const Normalizer2Impl *nfcImpl;
     148             : 
     149             :     /* current-level start/limit - s1/s2 as current */
     150             :     const UChar *start1, *start2, *limit1, *limit2;
     151             : 
     152             :     /* decomposition and case folding variables */
     153             :     const UChar *p;
     154             :     int32_t length;
     155             : 
     156             :     /* stacks of previous-level start/current/limit */
     157             :     CmpEquivLevel stack1[2], stack2[2];
     158             : 
     159             :     /* buffers for algorithmic decompositions */
     160             :     UChar decomp1[4], decomp2[4];
     161             : 
     162             :     /* case folding buffers, only use current-level start/limit */
     163             :     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
     164             : 
     165             :     /* track which is the current level per string */
     166             :     int32_t level1, level2;
     167             : 
     168             :     /* current code units, and code points for lookups */
     169             :     UChar32 c1, c2, cp1, cp2;
     170             : 
     171             :     /* no argument error checking because this itself is not an API */
     172             : 
     173             :     /*
     174             :      * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
     175             :      * otherwise this function must behave exactly as uprv_strCompare()
     176             :      * not checking for that here makes testing this function easier
     177             :      */
     178             : 
     179             :     /* normalization/properties data loaded? */
     180           0 :     if((options&_COMPARE_EQUIV)!=0) {
     181           0 :         nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
     182             :     } else {
     183           0 :         nfcImpl=NULL;
     184             :     }
     185           0 :     if(U_FAILURE(*pErrorCode)) {
     186           0 :         return 0;
     187             :     }
     188             : 
     189             :     /* initialize */
     190           0 :     start1=s1;
     191           0 :     if(length1==-1) {
     192           0 :         limit1=NULL;
     193             :     } else {
     194           0 :         limit1=s1+length1;
     195             :     }
     196             : 
     197           0 :     start2=s2;
     198           0 :     if(length2==-1) {
     199           0 :         limit2=NULL;
     200             :     } else {
     201           0 :         limit2=s2+length2;
     202             :     }
     203             : 
     204           0 :     level1=level2=0;
     205           0 :     c1=c2=-1;
     206             : 
     207             :     /* comparison loop */
     208             :     for(;;) {
     209             :         /*
     210             :          * here a code unit value of -1 means "get another code unit"
     211             :          * below it will mean "this source is finished"
     212             :          */
     213             : 
     214           0 :         if(c1<0) {
     215             :             /* get next code unit from string 1, post-increment */
     216             :             for(;;) {
     217           0 :                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
     218           0 :                     if(level1==0) {
     219           0 :                         c1=-1;
     220           0 :                         break;
     221             :                     }
     222             :                 } else {
     223           0 :                     ++s1;
     224           0 :                     break;
     225             :                 }
     226             : 
     227             :                 /* reached end of level buffer, pop one level */
     228           0 :                 do {
     229           0 :                     --level1;
     230           0 :                     start1=stack1[level1].start;    /*Not uninitialized*/
     231           0 :                 } while(start1==NULL);
     232           0 :                 s1=stack1[level1].s;                /*Not uninitialized*/
     233           0 :                 limit1=stack1[level1].limit;        /*Not uninitialized*/
     234             :             }
     235             :         }
     236             : 
     237           0 :         if(c2<0) {
     238             :             /* get next code unit from string 2, post-increment */
     239             :             for(;;) {
     240           0 :                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
     241           0 :                     if(level2==0) {
     242           0 :                         c2=-1;
     243           0 :                         break;
     244             :                     }
     245             :                 } else {
     246           0 :                     ++s2;
     247           0 :                     break;
     248             :                 }
     249             : 
     250             :                 /* reached end of level buffer, pop one level */
     251           0 :                 do {
     252           0 :                     --level2;
     253           0 :                     start2=stack2[level2].start;    /*Not uninitialized*/
     254           0 :                 } while(start2==NULL);
     255           0 :                 s2=stack2[level2].s;                /*Not uninitialized*/
     256           0 :                 limit2=stack2[level2].limit;        /*Not uninitialized*/
     257             :             }
     258             :         }
     259             : 
     260             :         /*
     261             :          * compare c1 and c2
     262             :          * either variable c1, c2 is -1 only if the corresponding string is finished
     263             :          */
     264           0 :         if(c1==c2) {
     265           0 :             if(c1<0) {
     266           0 :                 return 0;   /* c1==c2==-1 indicating end of strings */
     267             :             }
     268           0 :             c1=c2=-1;       /* make us fetch new code units */
     269           0 :             continue;
     270           0 :         } else if(c1<0) {
     271           0 :             return -1;      /* string 1 ends before string 2 */
     272           0 :         } else if(c2<0) {
     273           0 :             return 1;       /* string 2 ends before string 1 */
     274             :         }
     275             :         /* c1!=c2 && c1>=0 && c2>=0 */
     276             : 
     277             :         /* get complete code points for c1, c2 for lookups if either is a surrogate */
     278           0 :         cp1=c1;
     279           0 :         if(U_IS_SURROGATE(c1)) {
     280             :             UChar c;
     281             : 
     282           0 :             if(U_IS_SURROGATE_LEAD(c1)) {
     283           0 :                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
     284             :                     /* advance ++s1; only below if cp1 decomposes/case-folds */
     285           0 :                     cp1=U16_GET_SUPPLEMENTARY(c1, c);
     286             :                 }
     287             :             } else /* isTrail(c1) */ {
     288           0 :                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
     289           0 :                     cp1=U16_GET_SUPPLEMENTARY(c, c1);
     290             :                 }
     291             :             }
     292             :         }
     293             : 
     294           0 :         cp2=c2;
     295           0 :         if(U_IS_SURROGATE(c2)) {
     296             :             UChar c;
     297             : 
     298           0 :             if(U_IS_SURROGATE_LEAD(c2)) {
     299           0 :                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
     300             :                     /* advance ++s2; only below if cp2 decomposes/case-folds */
     301           0 :                     cp2=U16_GET_SUPPLEMENTARY(c2, c);
     302             :                 }
     303             :             } else /* isTrail(c2) */ {
     304           0 :                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
     305           0 :                     cp2=U16_GET_SUPPLEMENTARY(c, c2);
     306             :                 }
     307             :             }
     308             :         }
     309             : 
     310             :         /*
     311             :          * go down one level for each string
     312             :          * continue with the main loop as soon as there is a real change
     313             :          */
     314             : 
     315           0 :         if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
     316           0 :             (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
     317             :         ) {
     318             :             /* cp1 case-folds to the code point "length" or to p[length] */
     319           0 :             if(U_IS_SURROGATE(c1)) {
     320           0 :                 if(U_IS_SURROGATE_LEAD(c1)) {
     321             :                     /* advance beyond source surrogate pair if it case-folds */
     322           0 :                     ++s1;
     323             :                 } else /* isTrail(c1) */ {
     324             :                     /*
     325             :                      * we got a supplementary code point when hitting its trail surrogate,
     326             :                      * therefore the lead surrogate must have been the same as in the other string;
     327             :                      * compare this decomposition with the lead surrogate in the other string
     328             :                      * remember that this simulates bulk text replacement:
     329             :                      * the decomposition would replace the entire code point
     330             :                      */
     331           0 :                     --s2;
     332           0 :                     c2=*(s2-1);
     333             :                 }
     334             :             }
     335             : 
     336             :             /* push current level pointers */
     337           0 :             stack1[0].start=start1;
     338           0 :             stack1[0].s=s1;
     339           0 :             stack1[0].limit=limit1;
     340           0 :             ++level1;
     341             : 
     342             :             /* copy the folding result to fold1[] */
     343           0 :             if(length<=UCASE_MAX_STRING_LENGTH) {
     344           0 :                 u_memcpy(fold1, p, length);
     345             :             } else {
     346           0 :                 int32_t i=0;
     347           0 :                 U16_APPEND_UNSAFE(fold1, i, length);
     348           0 :                 length=i;
     349             :             }
     350             : 
     351             :             /* set next level pointers to case folding */
     352           0 :             start1=s1=fold1;
     353           0 :             limit1=fold1+length;
     354             : 
     355             :             /* get ready to read from decomposition, continue with loop */
     356           0 :             c1=-1;
     357           0 :             continue;
     358             :         }
     359             : 
     360           0 :         if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
     361           0 :             (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
     362             :         ) {
     363             :             /* cp2 case-folds to the code point "length" or to p[length] */
     364           0 :             if(U_IS_SURROGATE(c2)) {
     365           0 :                 if(U_IS_SURROGATE_LEAD(c2)) {
     366             :                     /* advance beyond source surrogate pair if it case-folds */
     367           0 :                     ++s2;
     368             :                 } else /* isTrail(c2) */ {
     369             :                     /*
     370             :                      * we got a supplementary code point when hitting its trail surrogate,
     371             :                      * therefore the lead surrogate must have been the same as in the other string;
     372             :                      * compare this decomposition with the lead surrogate in the other string
     373             :                      * remember that this simulates bulk text replacement:
     374             :                      * the decomposition would replace the entire code point
     375             :                      */
     376           0 :                     --s1;
     377           0 :                     c1=*(s1-1);
     378             :                 }
     379             :             }
     380             : 
     381             :             /* push current level pointers */
     382           0 :             stack2[0].start=start2;
     383           0 :             stack2[0].s=s2;
     384           0 :             stack2[0].limit=limit2;
     385           0 :             ++level2;
     386             : 
     387             :             /* copy the folding result to fold2[] */
     388           0 :             if(length<=UCASE_MAX_STRING_LENGTH) {
     389           0 :                 u_memcpy(fold2, p, length);
     390             :             } else {
     391           0 :                 int32_t i=0;
     392           0 :                 U16_APPEND_UNSAFE(fold2, i, length);
     393           0 :                 length=i;
     394             :             }
     395             : 
     396             :             /* set next level pointers to case folding */
     397           0 :             start2=s2=fold2;
     398           0 :             limit2=fold2+length;
     399             : 
     400             :             /* get ready to read from decomposition, continue with loop */
     401           0 :             c2=-1;
     402           0 :             continue;
     403             :         }
     404             : 
     405           0 :         if( level1<2 && (options&_COMPARE_EQUIV) &&
     406           0 :             0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
     407             :         ) {
     408             :             /* cp1 decomposes into p[length] */
     409           0 :             if(U_IS_SURROGATE(c1)) {
     410           0 :                 if(U_IS_SURROGATE_LEAD(c1)) {
     411             :                     /* advance beyond source surrogate pair if it decomposes */
     412           0 :                     ++s1;
     413             :                 } else /* isTrail(c1) */ {
     414             :                     /*
     415             :                      * we got a supplementary code point when hitting its trail surrogate,
     416             :                      * therefore the lead surrogate must have been the same as in the other string;
     417             :                      * compare this decomposition with the lead surrogate in the other string
     418             :                      * remember that this simulates bulk text replacement:
     419             :                      * the decomposition would replace the entire code point
     420             :                      */
     421           0 :                     --s2;
     422           0 :                     c2=*(s2-1);
     423             :                 }
     424             :             }
     425             : 
     426             :             /* push current level pointers */
     427           0 :             stack1[level1].start=start1;
     428           0 :             stack1[level1].s=s1;
     429           0 :             stack1[level1].limit=limit1;
     430           0 :             ++level1;
     431             : 
     432             :             /* set empty intermediate level if skipped */
     433           0 :             if(level1<2) {
     434           0 :                 stack1[level1++].start=NULL;
     435             :             }
     436             : 
     437             :             /* set next level pointers to decomposition */
     438           0 :             start1=s1=p;
     439           0 :             limit1=p+length;
     440             : 
     441             :             /* get ready to read from decomposition, continue with loop */
     442           0 :             c1=-1;
     443           0 :             continue;
     444             :         }
     445             : 
     446           0 :         if( level2<2 && (options&_COMPARE_EQUIV) &&
     447           0 :             0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
     448             :         ) {
     449             :             /* cp2 decomposes into p[length] */
     450           0 :             if(U_IS_SURROGATE(c2)) {
     451           0 :                 if(U_IS_SURROGATE_LEAD(c2)) {
     452             :                     /* advance beyond source surrogate pair if it decomposes */
     453           0 :                     ++s2;
     454             :                 } else /* isTrail(c2) */ {
     455             :                     /*
     456             :                      * we got a supplementary code point when hitting its trail surrogate,
     457             :                      * therefore the lead surrogate must have been the same as in the other string;
     458             :                      * compare this decomposition with the lead surrogate in the other string
     459             :                      * remember that this simulates bulk text replacement:
     460             :                      * the decomposition would replace the entire code point
     461             :                      */
     462           0 :                     --s1;
     463           0 :                     c1=*(s1-1);
     464             :                 }
     465             :             }
     466             : 
     467             :             /* push current level pointers */
     468           0 :             stack2[level2].start=start2;
     469           0 :             stack2[level2].s=s2;
     470           0 :             stack2[level2].limit=limit2;
     471           0 :             ++level2;
     472             : 
     473             :             /* set empty intermediate level if skipped */
     474           0 :             if(level2<2) {
     475           0 :                 stack2[level2++].start=NULL;
     476             :             }
     477             : 
     478             :             /* set next level pointers to decomposition */
     479           0 :             start2=s2=p;
     480           0 :             limit2=p+length;
     481             : 
     482             :             /* get ready to read from decomposition, continue with loop */
     483           0 :             c2=-1;
     484           0 :             continue;
     485             :         }
     486             : 
     487             :         /*
     488             :          * no decomposition/case folding, max level for both sides:
     489             :          * return difference result
     490             :          *
     491             :          * code point order comparison must not just return cp1-cp2
     492             :          * because when single surrogates are present then the surrogate pairs
     493             :          * that formed cp1 and cp2 may be from different string indexes
     494             :          *
     495             :          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
     496             :          * c1=d800 cp1=10001 c2=dc00 cp2=10000
     497             :          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
     498             :          *
     499             :          * therefore, use same fix-up as in ustring.c/uprv_strCompare()
     500             :          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
     501             :          * so we have slightly different pointer/start/limit comparisons here
     502             :          */
     503             : 
     504           0 :         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
     505             :             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
     506           0 :             if(
     507           0 :                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
     508           0 :                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
     509             :             ) {
     510             :                 /* part of a surrogate pair, leave >=d800 */
     511             :             } else {
     512             :                 /* BMP code point - may be surrogate code point - make <d800 */
     513           0 :                 c1-=0x2800;
     514             :             }
     515             : 
     516           0 :             if(
     517           0 :                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
     518           0 :                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
     519             :             ) {
     520             :                 /* part of a surrogate pair, leave >=d800 */
     521             :             } else {
     522             :                 /* BMP code point - may be surrogate code point - make <d800 */
     523           0 :                 c2-=0x2800;
     524             :             }
     525             :         }
     526             : 
     527           0 :         return c1-c2;
     528           0 :     }
     529             : }
     530             : 
     531             : static
     532           0 : UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
     533             :                 UnicodeString &normalized, UErrorCode *pErrorCode) {
     534           0 :     UnicodeString str(length<0, s, length);
     535             : 
     536             :     // check if s fulfill the conditions
     537           0 :     int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
     538           0 :     if (U_FAILURE(*pErrorCode)) {
     539           0 :         return FALSE;
     540             :     }
     541             :     /*
     542             :      * ICU 2.4 had a further optimization:
     543             :      * If both strings were not in FCD, then they were both NFD'ed,
     544             :      * and the _COMPARE_EQUIV option was turned off.
     545             :      * It is not entirely clear that this is valid with the current
     546             :      * definition of the canonical caseless match.
     547             :      * Therefore, ICU 2.6 removes that optimization.
     548             :      */
     549           0 :     if(spanQCYes<str.length()) {
     550           0 :         UnicodeString unnormalized=str.tempSubString(spanQCYes);
     551           0 :         normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
     552           0 :         n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
     553           0 :         if (U_SUCCESS(*pErrorCode)) {
     554           0 :             return TRUE;
     555             :         }
     556             :     }
     557           0 :     return FALSE;
     558             : }
     559             : 
     560             : U_CAPI int32_t U_EXPORT2
     561           0 : unorm_compare(const UChar *s1, int32_t length1,
     562             :               const UChar *s2, int32_t length2,
     563             :               uint32_t options,
     564             :               UErrorCode *pErrorCode) {
     565             :     /* argument checking */
     566           0 :     if(U_FAILURE(*pErrorCode)) {
     567           0 :         return 0;
     568             :     }
     569           0 :     if(s1==0 || length1<-1 || s2==0 || length2<-1) {
     570           0 :         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     571           0 :         return 0;
     572             :     }
     573             : 
     574           0 :     UnicodeString fcd1, fcd2;
     575           0 :     int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
     576           0 :     options|=_COMPARE_EQUIV;
     577             : 
     578             :     /*
     579             :      * UAX #21 Case Mappings, as fixed for Unicode version 4
     580             :      * (see Jitterbug 2021), defines a canonical caseless match as
     581             :      *
     582             :      * A string X is a canonical caseless match
     583             :      * for a string Y if and only if
     584             :      * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
     585             :      *
     586             :      * For better performance, we check for FCD (or let the caller tell us that
     587             :      * both strings are in FCD) for the inner normalization.
     588             :      * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
     589             :      * case-folding preserves the FCD-ness of a string.
     590             :      * The outer normalization is then only performed by unorm_cmpEquivFold()
     591             :      * when there is a difference.
     592             :      *
     593             :      * Exception: When using the Turkic case-folding option, we do perform
     594             :      * full NFD first. This is because in the Turkic case precomposed characters
     595             :      * with 0049 capital I or 0069 small i fold differently whether they
     596             :      * are first decomposed or not, so an FCD check - a check only for
     597             :      * canonical order - is not sufficient.
     598             :      */
     599           0 :     if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
     600             :         const Normalizer2 *n2;
     601           0 :         if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
     602           0 :             n2=Normalizer2::getNFDInstance(*pErrorCode);
     603             :         } else {
     604           0 :             n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
     605             :         }
     606           0 :         if (U_FAILURE(*pErrorCode)) {
     607           0 :             return 0;
     608             :         }
     609             : 
     610           0 :         if(normOptions&UNORM_UNICODE_3_2) {
     611           0 :             const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
     612           0 :             FilteredNormalizer2 fn2(*n2, *uni32);
     613           0 :             if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
     614           0 :                 s1=fcd1.getBuffer();
     615           0 :                 length1=fcd1.length();
     616             :             }
     617           0 :             if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
     618           0 :                 s2=fcd2.getBuffer();
     619           0 :                 length2=fcd2.length();
     620             :             }
     621             :         } else {
     622           0 :             if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
     623           0 :                 s1=fcd1.getBuffer();
     624           0 :                 length1=fcd1.length();
     625             :             }
     626           0 :             if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
     627           0 :                 s2=fcd2.getBuffer();
     628           0 :                 length2=fcd2.length();
     629             :             }
     630             :         }
     631             :     }
     632             : 
     633           0 :     if(U_SUCCESS(*pErrorCode)) {
     634           0 :         return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
     635             :     } else {
     636           0 :         return 0;
     637             :     }
     638             : }
     639             : 
     640             : #endif /* #if !UCONFIG_NO_NORMALIZATION */

Generated by: LCOV version 1.13