LCOV - code coverage report
Current view: top level - intl/icu/source/common - caniter.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 257 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             :  *****************************************************************************
       5             :  * Copyright (C) 1996-2015, International Business Machines Corporation and
       6             :  * others. All Rights Reserved.
       7             :  *****************************************************************************
       8             :  */
       9             : 
      10             : #include "unicode/utypes.h"
      11             : 
      12             : #if !UCONFIG_NO_NORMALIZATION
      13             : 
      14             : #include "unicode/caniter.h"
      15             : #include "unicode/normalizer2.h"
      16             : #include "unicode/uchar.h"
      17             : #include "unicode/uniset.h"
      18             : #include "unicode/usetiter.h"
      19             : #include "unicode/ustring.h"
      20             : #include "unicode/utf16.h"
      21             : #include "cmemory.h"
      22             : #include "hash.h"
      23             : #include "normalizer2impl.h"
      24             : 
      25             : /**
      26             :  * This class allows one to iterate through all the strings that are canonically equivalent to a given
      27             :  * string. For example, here are some sample results:
      28             : Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
      29             : 1: \u0041\u030A\u0064\u0307\u0327
      30             :  = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
      31             : 2: \u0041\u030A\u0064\u0327\u0307
      32             :  = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
      33             : 3: \u0041\u030A\u1E0B\u0327
      34             :  = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
      35             : 4: \u0041\u030A\u1E11\u0307
      36             :  = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
      37             : 5: \u00C5\u0064\u0307\u0327
      38             :  = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
      39             : 6: \u00C5\u0064\u0327\u0307
      40             :  = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
      41             : 7: \u00C5\u1E0B\u0327
      42             :  = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
      43             : 8: \u00C5\u1E11\u0307
      44             :  = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
      45             : 9: \u212B\u0064\u0307\u0327
      46             :  = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
      47             : 10: \u212B\u0064\u0327\u0307
      48             :  = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
      49             : 11: \u212B\u1E0B\u0327
      50             :  = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
      51             : 12: \u212B\u1E11\u0307
      52             :  = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
      53             :  *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
      54             :  * since it has not been optimized for that situation.
      55             :  *@author M. Davis
      56             :  *@draft
      57             :  */
      58             : 
      59             : // public
      60             : 
      61             : U_NAMESPACE_BEGIN
      62             : 
      63             : // TODO: add boilerplate methods.
      64             : 
      65           0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
      66             : 
      67             : /**
      68             :  *@param source string to get results for
      69             :  */
      70           0 : CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) :
      71             :     pieces(NULL),
      72             :     pieces_length(0),
      73             :     pieces_lengths(NULL),
      74             :     current(NULL),
      75             :     current_length(0),
      76           0 :     nfd(*Normalizer2::getNFDInstance(status)),
      77           0 :     nfcImpl(*Normalizer2Factory::getNFCImpl(status))
      78             : {
      79           0 :     if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
      80           0 :       setSource(sourceStr, status);
      81             :     }
      82           0 : }
      83             : 
      84           0 : CanonicalIterator::~CanonicalIterator() {
      85           0 :   cleanPieces();
      86           0 : }
      87             : 
      88           0 : void CanonicalIterator::cleanPieces() {
      89           0 :     int32_t i = 0;
      90           0 :     if(pieces != NULL) {
      91           0 :         for(i = 0; i < pieces_length; i++) {
      92           0 :             if(pieces[i] != NULL) {
      93           0 :                 delete[] pieces[i];
      94             :             }
      95             :         }
      96           0 :         uprv_free(pieces);
      97           0 :         pieces = NULL;
      98           0 :         pieces_length = 0;
      99             :     }
     100           0 :     if(pieces_lengths != NULL) {
     101           0 :         uprv_free(pieces_lengths);
     102           0 :         pieces_lengths = NULL;
     103             :     }
     104           0 :     if(current != NULL) {
     105           0 :         uprv_free(current);
     106           0 :         current = NULL;
     107           0 :         current_length = 0;
     108             :     }
     109           0 : }
     110             : 
     111             : /**
     112             :  *@return gets the source: NOTE: it is the NFD form of source
     113             :  */
     114           0 : UnicodeString CanonicalIterator::getSource() {
     115           0 :   return source;
     116             : }
     117             : 
     118             : /**
     119             :  * Resets the iterator so that one can start again from the beginning.
     120             :  */
     121           0 : void CanonicalIterator::reset() {
     122           0 :     done = FALSE;
     123           0 :     for (int i = 0; i < current_length; ++i) {
     124           0 :         current[i] = 0;
     125             :     }
     126           0 : }
     127             : 
     128             : /**
     129             :  *@return the next string that is canonically equivalent. The value null is returned when
     130             :  * the iteration is done.
     131             :  */
     132           0 : UnicodeString CanonicalIterator::next() {
     133           0 :     int32_t i = 0;
     134             : 
     135           0 :     if (done) {
     136           0 :       buffer.setToBogus();
     137           0 :       return buffer;
     138             :     }
     139             : 
     140             :     // delete old contents
     141           0 :     buffer.remove();
     142             : 
     143             :     // construct return value
     144             : 
     145           0 :     for (i = 0; i < pieces_length; ++i) {
     146           0 :         buffer.append(pieces[i][current[i]]);
     147             :     }
     148             :     //String result = buffer.toString(); // not needed
     149             : 
     150             :     // find next value for next time
     151             : 
     152           0 :     for (i = current_length - 1; ; --i) {
     153           0 :         if (i < 0) {
     154           0 :             done = TRUE;
     155           0 :             break;
     156             :         }
     157           0 :         current[i]++;
     158           0 :         if (current[i] < pieces_lengths[i]) break; // got sequence
     159           0 :         current[i] = 0;
     160             :     }
     161           0 :     return buffer;
     162             : }
     163             : 
     164             : /**
     165             :  *@param set the source string to iterate against. This allows the same iterator to be used
     166             :  * while changing the source string, saving object creation.
     167             :  */
     168           0 : void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) {
     169           0 :     int32_t list_length = 0;
     170           0 :     UChar32 cp = 0;
     171           0 :     int32_t start = 0;
     172           0 :     int32_t i = 0;
     173           0 :     UnicodeString *list = NULL;
     174             : 
     175           0 :     nfd.normalize(newSource, source, status);
     176           0 :     if(U_FAILURE(status)) {
     177           0 :       return;
     178             :     }
     179           0 :     done = FALSE;
     180             : 
     181           0 :     cleanPieces();
     182             : 
     183             :     // catch degenerate case
     184           0 :     if (newSource.length() == 0) {
     185           0 :         pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *));
     186           0 :         pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
     187           0 :         pieces_length = 1;
     188           0 :         current = (int32_t*)uprv_malloc(1 * sizeof(int32_t));
     189           0 :         current_length = 1;
     190           0 :         if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
     191           0 :             status = U_MEMORY_ALLOCATION_ERROR;
     192           0 :             goto CleanPartialInitialization;
     193             :         }
     194           0 :         current[0] = 0;
     195           0 :         pieces[0] = new UnicodeString[1];
     196           0 :         pieces_lengths[0] = 1;
     197           0 :         if (pieces[0] == 0) {
     198           0 :             status = U_MEMORY_ALLOCATION_ERROR;
     199           0 :             goto CleanPartialInitialization;
     200             :         }
     201           0 :         return;
     202             :     }
     203             : 
     204             : 
     205           0 :     list = new UnicodeString[source.length()];
     206           0 :     if (list == 0) {
     207           0 :         status = U_MEMORY_ALLOCATION_ERROR;
     208           0 :         goto CleanPartialInitialization;
     209             :     }
     210             : 
     211             :     // i should initialy be the number of code units at the 
     212             :     // start of the string
     213           0 :     i = U16_LENGTH(source.char32At(0));
     214             :     //int32_t i = 1;
     215             :     // find the segments
     216             :     // This code iterates through the source string and 
     217             :     // extracts segments that end up on a codepoint that
     218             :     // doesn't start any decompositions. (Analysis is done
     219             :     // on the NFD form - see above).
     220           0 :     for (; i < source.length(); i += U16_LENGTH(cp)) {
     221           0 :         cp = source.char32At(i);
     222           0 :         if (nfcImpl.isCanonSegmentStarter(cp)) {
     223           0 :             source.extract(start, i-start, list[list_length++]); // add up to i
     224           0 :             start = i;
     225             :         }
     226             :     }
     227           0 :     source.extract(start, i-start, list[list_length++]); // add last one
     228             : 
     229             : 
     230             :     // allocate the arrays, and find the strings that are CE to each segment
     231           0 :     pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *));
     232           0 :     pieces_length = list_length;
     233           0 :     pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
     234           0 :     current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t));
     235           0 :     current_length = list_length;
     236           0 :     if (pieces == NULL || pieces_lengths == NULL || current == NULL) {
     237           0 :         status = U_MEMORY_ALLOCATION_ERROR;
     238           0 :         goto CleanPartialInitialization;
     239             :     }
     240             : 
     241           0 :     for (i = 0; i < current_length; i++) {
     242           0 :         current[i] = 0;
     243             :     }
     244             :     // for each segment, get all the combinations that can produce 
     245             :     // it after NFD normalization
     246           0 :     for (i = 0; i < pieces_length; ++i) {
     247             :         //if (PROGRESS) printf("SEGMENT\n");
     248           0 :         pieces[i] = getEquivalents(list[i], pieces_lengths[i], status);
     249             :     }
     250             : 
     251           0 :     delete[] list;
     252           0 :     return;
     253             : // Common section to cleanup all local variables and reset object variables.
     254             : CleanPartialInitialization:
     255           0 :     if (list != NULL) {
     256           0 :         delete[] list;
     257             :     }
     258           0 :     cleanPieces();
     259             : }
     260             : 
     261             : /**
     262             :  * Dumb recursive implementation of permutation.
     263             :  * TODO: optimize
     264             :  * @param source the string to find permutations for
     265             :  * @return the results in a set.
     266             :  */
     267           0 : void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
     268           0 :     if(U_FAILURE(status)) {
     269           0 :         return;
     270             :     }
     271             :     //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
     272           0 :     int32_t i = 0;
     273             : 
     274             :     // optimization:
     275             :     // if zero or one character, just return a set with it
     276             :     // we check for length < 2 to keep from counting code points all the time
     277           0 :     if (source.length() <= 2 && source.countChar32() <= 1) {
     278           0 :         UnicodeString *toPut = new UnicodeString(source);
     279             :         /* test for NULL */
     280           0 :         if (toPut == 0) {
     281           0 :             status = U_MEMORY_ALLOCATION_ERROR;
     282           0 :             return;
     283             :         }
     284           0 :         result->put(source, toPut, status);
     285           0 :         return;
     286             :     }
     287             : 
     288             :     // otherwise iterate through the string, and recursively permute all the other characters
     289             :     UChar32 cp;
     290           0 :     Hashtable subpermute(status);
     291           0 :     if(U_FAILURE(status)) {
     292           0 :         return;
     293             :     }
     294           0 :     subpermute.setValueDeleter(uprv_deleteUObject);
     295             : 
     296           0 :     for (i = 0; i < source.length(); i += U16_LENGTH(cp)) {
     297           0 :         cp = source.char32At(i);
     298           0 :         const UHashElement *ne = NULL;
     299           0 :         int32_t el = UHASH_FIRST;
     300           0 :         UnicodeString subPermuteString = source;
     301             : 
     302             :         // optimization:
     303             :         // if the character is canonical combining class zero,
     304             :         // don't permute it
     305           0 :         if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) {
     306             :             //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
     307           0 :             continue;
     308             :         }
     309             : 
     310           0 :         subpermute.removeAll();
     311             : 
     312             :         // see what the permutations of the characters before and after this one are
     313             :         //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
     314           0 :         permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
     315             :         /* Test for buffer overflows */
     316           0 :         if(U_FAILURE(status)) {
     317           0 :             return;
     318             :         }
     319             :         // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents 
     320             :         // of source at this point.
     321             : 
     322             :         // prefix this character to all of them
     323           0 :         ne = subpermute.nextElement(el);
     324           0 :         while (ne != NULL) {
     325           0 :             UnicodeString *permRes = (UnicodeString *)(ne->value.pointer);
     326           0 :             UnicodeString *chStr = new UnicodeString(cp);
     327             :             //test for  NULL
     328           0 :             if (chStr == NULL) {
     329           0 :                 status = U_MEMORY_ALLOCATION_ERROR;
     330           0 :                 return;
     331             :             }
     332           0 :             chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer));
     333             :             //if (PROGRESS) printf("  Piece: %s\n", UToS(*chStr));
     334           0 :             result->put(*chStr, chStr, status);
     335           0 :             ne = subpermute.nextElement(el);
     336             :         }
     337             :     }
     338             :     //return result;
     339             : }
     340             : 
     341             : // privates
     342             : 
     343             : // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
     344           0 : UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) {
     345           0 :     Hashtable result(status);
     346           0 :     Hashtable permutations(status);
     347           0 :     Hashtable basic(status);
     348           0 :     if (U_FAILURE(status)) {
     349           0 :         return 0;
     350             :     }
     351           0 :     result.setValueDeleter(uprv_deleteUObject);
     352           0 :     permutations.setValueDeleter(uprv_deleteUObject);
     353           0 :     basic.setValueDeleter(uprv_deleteUObject);
     354             : 
     355             :     UChar USeg[256];
     356           0 :     int32_t segLen = segment.extract(USeg, 256, status);
     357           0 :     getEquivalents2(&basic, USeg, segLen, status);
     358             : 
     359             :     // now get all the permutations
     360             :     // add only the ones that are canonically equivalent
     361             :     // TODO: optimize by not permuting any class zero.
     362             : 
     363           0 :     const UHashElement *ne = NULL;
     364           0 :     int32_t el = UHASH_FIRST;
     365             :     //Iterator it = basic.iterator();
     366           0 :     ne = basic.nextElement(el);
     367             :     //while (it.hasNext())
     368           0 :     while (ne != NULL) {
     369             :         //String item = (String) it.next();
     370           0 :         UnicodeString item = *((UnicodeString *)(ne->value.pointer));
     371             : 
     372           0 :         permutations.removeAll();
     373           0 :         permute(item, CANITER_SKIP_ZEROES, &permutations, status);
     374           0 :         const UHashElement *ne2 = NULL;
     375           0 :         int32_t el2 = UHASH_FIRST;
     376             :         //Iterator it2 = permutations.iterator();
     377           0 :         ne2 = permutations.nextElement(el2);
     378             :         //while (it2.hasNext())
     379           0 :         while (ne2 != NULL) {
     380             :             //String possible = (String) it2.next();
     381             :             //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
     382           0 :             UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
     383           0 :             UnicodeString attempt;
     384           0 :             nfd.normalize(possible, attempt, status);
     385             : 
     386             :             // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
     387           0 :             if (attempt==segment) {
     388             :                 //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible)));
     389             :                 // TODO: use the hashtable just to catch duplicates - store strings directly (somehow).
     390           0 :                 result.put(possible, new UnicodeString(possible), status); //add(possible);
     391             :             } else {
     392             :                 //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible)));
     393             :             }
     394             : 
     395           0 :             ne2 = permutations.nextElement(el2);
     396             :         }
     397           0 :         ne = basic.nextElement(el);
     398             :     }
     399             : 
     400             :     /* Test for buffer overflows */
     401           0 :     if(U_FAILURE(status)) {
     402           0 :         return 0;
     403             :     }
     404             :     // convert into a String[] to clean up storage
     405             :     //String[] finalResult = new String[result.size()];
     406           0 :     UnicodeString *finalResult = NULL;
     407             :     int32_t resultCount;
     408           0 :     if((resultCount = result.count())) {
     409           0 :         finalResult = new UnicodeString[resultCount];
     410           0 :         if (finalResult == 0) {
     411           0 :             status = U_MEMORY_ALLOCATION_ERROR;
     412           0 :             return NULL;
     413             :         }
     414             :     }
     415             :     else {
     416           0 :         status = U_ILLEGAL_ARGUMENT_ERROR;
     417           0 :         return NULL;
     418             :     }
     419             :     //result.toArray(finalResult);
     420           0 :     result_len = 0;
     421           0 :     el = UHASH_FIRST;
     422           0 :     ne = result.nextElement(el);
     423           0 :     while(ne != NULL) {
     424           0 :         finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer));
     425           0 :         ne = result.nextElement(el);
     426             :     }
     427             : 
     428             : 
     429           0 :     return finalResult;
     430             : }
     431             : 
     432           0 : Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
     433             : 
     434           0 :     if (U_FAILURE(status)) {
     435           0 :         return NULL;
     436             :     }
     437             : 
     438             :     //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));
     439             : 
     440           0 :     UnicodeString toPut(segment, segLen);
     441             : 
     442           0 :     fillinResult->put(toPut, new UnicodeString(toPut), status);
     443             : 
     444           0 :     UnicodeSet starts;
     445             : 
     446             :     // cycle through all the characters
     447             :     UChar32 cp;
     448           0 :     for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
     449             :         // see if any character is at the start of some decomposition
     450           0 :         U16_GET(segment, 0, i, segLen, cp);
     451           0 :         if (!nfcImpl.getCanonStartSet(cp, starts)) {
     452           0 :             continue;
     453             :         }
     454             :         // if so, see which decompositions match
     455           0 :         UnicodeSetIterator iter(starts);
     456           0 :         while (iter.next()) {
     457           0 :             UChar32 cp2 = iter.getCodepoint();
     458           0 :             Hashtable remainder(status);
     459           0 :             remainder.setValueDeleter(uprv_deleteUObject);
     460           0 :             if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
     461           0 :                 continue;
     462             :             }
     463             : 
     464             :             // there were some matches, so add all the possibilities to the set.
     465           0 :             UnicodeString prefix(segment, i);
     466           0 :             prefix += cp2;
     467             : 
     468           0 :             int32_t el = UHASH_FIRST;
     469           0 :             const UHashElement *ne = remainder.nextElement(el);
     470           0 :             while (ne != NULL) {
     471           0 :                 UnicodeString item = *((UnicodeString *)(ne->value.pointer));
     472           0 :                 UnicodeString *toAdd = new UnicodeString(prefix);
     473             :                 /* test for NULL */
     474           0 :                 if (toAdd == 0) {
     475           0 :                     status = U_MEMORY_ALLOCATION_ERROR;
     476           0 :                     return NULL;
     477             :                 }
     478           0 :                 *toAdd += item;
     479           0 :                 fillinResult->put(*toAdd, toAdd, status);
     480             : 
     481             :                 //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
     482             : 
     483           0 :                 ne = remainder.nextElement(el);
     484             :             }
     485             :         }
     486             :     }
     487             : 
     488             :     /* Test for buffer overflows */
     489           0 :     if(U_FAILURE(status)) {
     490           0 :         return NULL;
     491             :     }
     492           0 :     return fillinResult;
     493             : }
     494             : 
     495             : /**
     496             :  * See if the decomposition of cp2 is at segment starting at segmentPos 
     497             :  * (with canonical rearrangment!)
     498             :  * If so, take the remainder, and return the equivalents 
     499             :  */
     500           0 : Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
     501             : //Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
     502             :     //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
     503             :     //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
     504             : 
     505           0 :     if (U_FAILURE(status)) {
     506           0 :         return NULL;
     507             :     }
     508             : 
     509           0 :     UnicodeString temp(comp);
     510           0 :     int32_t inputLen=temp.length();
     511           0 :     UnicodeString decompString;
     512           0 :     nfd.normalize(temp, decompString, status);
     513           0 :     if (U_FAILURE(status)) {
     514           0 :         return NULL;
     515             :     }
     516           0 :     if (decompString.isBogus()) {
     517           0 :         status = U_MEMORY_ALLOCATION_ERROR;
     518           0 :         return NULL;
     519             :     }
     520           0 :     const UChar *decomp=decompString.getBuffer();
     521           0 :     int32_t decompLen=decompString.length();
     522             : 
     523             :     // See if it matches the start of segment (at segmentPos)
     524           0 :     UBool ok = FALSE;
     525             :     UChar32 cp;
     526           0 :     int32_t decompPos = 0;
     527             :     UChar32 decompCp;
     528           0 :     U16_NEXT(decomp, decompPos, decompLen, decompCp);
     529             : 
     530           0 :     int32_t i = segmentPos;
     531           0 :     while(i < segLen) {
     532           0 :         U16_NEXT(segment, i, segLen, cp);
     533             : 
     534           0 :         if (cp == decompCp) { // if equal, eat another cp from decomp
     535             : 
     536             :             //if (PROGRESS) printf("  matches: %s\n", UToS(Tr(UnicodeString(cp))));
     537             : 
     538           0 :             if (decompPos == decompLen) { // done, have all decomp characters!
     539           0 :                 temp.append(segment+i, segLen-i);
     540           0 :                 ok = TRUE;
     541           0 :                 break;
     542             :             }
     543           0 :             U16_NEXT(decomp, decompPos, decompLen, decompCp);
     544             :         } else {
     545             :             //if (PROGRESS) printf("  buffer: %s\n", UToS(Tr(UnicodeString(cp))));
     546             : 
     547             :             // brute force approach
     548           0 :             temp.append(cp);
     549             : 
     550             :             /* TODO: optimize
     551             :             // since we know that the classes are monotonically increasing, after zero
     552             :             // e.g. 0 5 7 9 0 3
     553             :             // we can do an optimization
     554             :             // there are only a few cases that work: zero, less, same, greater
     555             :             // if both classes are the same, we fail
     556             :             // if the decomp class < the segment class, we fail
     557             : 
     558             :             segClass = getClass(cp);
     559             :             if (decompClass <= segClass) return null;
     560             :             */
     561             :         }
     562             :     }
     563           0 :     if (!ok)
     564           0 :         return NULL; // we failed, characters left over
     565             : 
     566             :     //if (PROGRESS) printf("Matches\n");
     567             : 
     568           0 :     if (inputLen == temp.length()) {
     569           0 :         fillinResult->put(UnicodeString(), new UnicodeString(), status);
     570           0 :         return fillinResult; // succeed, but no remainder
     571             :     }
     572             : 
     573             :     // brute force approach
     574             :     // check to make sure result is canonically equivalent
     575           0 :     UnicodeString trial;
     576           0 :     nfd.normalize(temp, trial, status);
     577           0 :     if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
     578           0 :         return NULL;
     579             :     }
     580             : 
     581           0 :     return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
     582             : }
     583             : 
     584             : U_NAMESPACE_END
     585             : 
     586             : #endif /* #if !UCONFIG_NO_NORMALIZATION */

Generated by: LCOV version 1.13