LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - utf16collationiterator.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 273 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 30 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2010-2014, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * utf16collationiterator.cpp
       9             : *
      10             : * created on: 2010oct27
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #include "unicode/utypes.h"
      15             : 
      16             : #if !UCONFIG_NO_COLLATION
      17             : 
      18             : #include "charstr.h"
      19             : #include "cmemory.h"
      20             : #include "collation.h"
      21             : #include "collationdata.h"
      22             : #include "collationfcd.h"
      23             : #include "collationiterator.h"
      24             : #include "normalizer2impl.h"
      25             : #include "uassert.h"
      26             : #include "utf16collationiterator.h"
      27             : 
      28             : U_NAMESPACE_BEGIN
      29             : 
      30           0 : UTF16CollationIterator::UTF16CollationIterator(const UTF16CollationIterator &other,
      31           0 :                                                const UChar *newText)
      32             :         : CollationIterator(other),
      33             :           start(newText),
      34           0 :           pos(newText + (other.pos - other.start)),
      35           0 :           limit(other.limit == NULL ? NULL : newText + (other.limit - other.start)) {
      36           0 : }
      37             : 
      38           0 : UTF16CollationIterator::~UTF16CollationIterator() {}
      39             : 
      40             : UBool
      41           0 : UTF16CollationIterator::operator==(const CollationIterator &other) const {
      42           0 :     if(!CollationIterator::operator==(other)) { return FALSE; }
      43           0 :     const UTF16CollationIterator &o = static_cast<const UTF16CollationIterator &>(other);
      44             :     // Compare the iterator state but not the text: Assume that the caller does that.
      45           0 :     return (pos - start) == (o.pos - o.start);
      46             : }
      47             : 
      48             : void
      49           0 : UTF16CollationIterator::resetToOffset(int32_t newOffset) {
      50           0 :     reset();
      51           0 :     pos = start + newOffset;
      52           0 : }
      53             : 
      54             : int32_t
      55           0 : UTF16CollationIterator::getOffset() const {
      56           0 :     return (int32_t)(pos - start);
      57             : }
      58             : 
      59             : uint32_t
      60           0 : UTF16CollationIterator::handleNextCE32(UChar32 &c, UErrorCode & /*errorCode*/) {
      61           0 :     if(pos == limit) {
      62           0 :         c = U_SENTINEL;
      63           0 :         return Collation::FALLBACK_CE32;
      64             :     }
      65           0 :     c = *pos++;
      66           0 :     return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
      67             : }
      68             : 
      69             : UChar
      70           0 : UTF16CollationIterator::handleGetTrailSurrogate() {
      71           0 :     if(pos == limit) { return 0; }
      72             :     UChar trail;
      73           0 :     if(U16_IS_TRAIL(trail = *pos)) { ++pos; }
      74           0 :     return trail;
      75             : }
      76             : 
      77             : UBool
      78           0 : UTF16CollationIterator::foundNULTerminator() {
      79           0 :     if(limit == NULL) {
      80           0 :         limit = --pos;
      81           0 :         return TRUE;
      82             :     } else {
      83           0 :         return FALSE;
      84             :     }
      85             : }
      86             : 
      87             : UChar32
      88           0 : UTF16CollationIterator::nextCodePoint(UErrorCode & /*errorCode*/) {
      89           0 :     if(pos == limit) {
      90           0 :         return U_SENTINEL;
      91             :     }
      92           0 :     UChar32 c = *pos;
      93           0 :     if(c == 0 && limit == NULL) {
      94           0 :         limit = pos;
      95           0 :         return U_SENTINEL;
      96             :     }
      97           0 :     ++pos;
      98             :     UChar trail;
      99           0 :     if(U16_IS_LEAD(c) && pos != limit && U16_IS_TRAIL(trail = *pos)) {
     100           0 :         ++pos;
     101           0 :         return U16_GET_SUPPLEMENTARY(c, trail);
     102             :     } else {
     103           0 :         return c;
     104             :     }
     105             : }
     106             : 
     107             : UChar32
     108           0 : UTF16CollationIterator::previousCodePoint(UErrorCode & /*errorCode*/) {
     109           0 :     if(pos == start) {
     110           0 :         return U_SENTINEL;
     111             :     }
     112           0 :     UChar32 c = *--pos;
     113             :     UChar lead;
     114           0 :     if(U16_IS_TRAIL(c) && pos != start && U16_IS_LEAD(lead = *(pos - 1))) {
     115           0 :         --pos;
     116           0 :         return U16_GET_SUPPLEMENTARY(lead, c);
     117             :     } else {
     118           0 :         return c;
     119             :     }
     120             : }
     121             : 
     122             : void
     123           0 : UTF16CollationIterator::forwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) {
     124           0 :     while(num > 0 && pos != limit) {
     125           0 :         UChar32 c = *pos;
     126           0 :         if(c == 0 && limit == NULL) {
     127           0 :             limit = pos;
     128           0 :             break;
     129             :         }
     130           0 :         ++pos;
     131           0 :         --num;
     132           0 :         if(U16_IS_LEAD(c) && pos != limit && U16_IS_TRAIL(*pos)) {
     133           0 :             ++pos;
     134             :         }
     135             :     }
     136           0 : }
     137             : 
     138             : void
     139           0 : UTF16CollationIterator::backwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) {
     140           0 :     while(num > 0 && pos != start) {
     141           0 :         UChar32 c = *--pos;
     142           0 :         --num;
     143           0 :         if(U16_IS_TRAIL(c) && pos != start && U16_IS_LEAD(*(pos-1))) {
     144           0 :             --pos;
     145             :         }
     146             :     }
     147           0 : }
     148             : 
     149             : // FCDUTF16CollationIterator ----------------------------------------------- ***
     150             : 
     151           0 : FCDUTF16CollationIterator::FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other,
     152           0 :                                                      const UChar *newText)
     153             :         : UTF16CollationIterator(other),
     154             :           rawStart(newText),
     155           0 :           segmentStart(newText + (other.segmentStart - other.rawStart)),
     156           0 :           segmentLimit(other.segmentLimit == NULL ? NULL : newText + (other.segmentLimit - other.rawStart)),
     157           0 :           rawLimit(other.rawLimit == NULL ? NULL : newText + (other.rawLimit - other.rawStart)),
     158           0 :           nfcImpl(other.nfcImpl),
     159             :           normalized(other.normalized),
     160           0 :           checkDir(other.checkDir) {
     161           0 :     if(checkDir != 0 || other.start == other.segmentStart) {
     162           0 :         start = newText + (other.start - other.rawStart);
     163           0 :         pos = newText + (other.pos - other.rawStart);
     164           0 :         limit = other.limit == NULL ? NULL : newText + (other.limit - other.rawStart);
     165             :     } else {
     166           0 :         start = normalized.getBuffer();
     167           0 :         pos = start + (other.pos - other.start);
     168           0 :         limit = start + normalized.length();
     169             :     }
     170           0 : }
     171             : 
     172           0 : FCDUTF16CollationIterator::~FCDUTF16CollationIterator() {}
     173             : 
     174             : UBool
     175           0 : FCDUTF16CollationIterator::operator==(const CollationIterator &other) const {
     176             :     // Skip the UTF16CollationIterator and call its parent.
     177           0 :     if(!CollationIterator::operator==(other)) { return FALSE; }
     178           0 :     const FCDUTF16CollationIterator &o = static_cast<const FCDUTF16CollationIterator &>(other);
     179             :     // Compare the iterator state but not the text: Assume that the caller does that.
     180           0 :     if(checkDir != o.checkDir) { return FALSE; }
     181           0 :     if(checkDir == 0 && (start == segmentStart) != (o.start == o.segmentStart)) { return FALSE; }
     182           0 :     if(checkDir != 0 || start == segmentStart) {
     183           0 :         return (pos - rawStart) == (o.pos - o.rawStart);
     184             :     } else {
     185           0 :         return (segmentStart - rawStart) == (o.segmentStart - o.rawStart) &&
     186           0 :                 (pos - start) == (o.pos - o.start);
     187             :     }
     188             : }
     189             : 
     190             : void
     191           0 : FCDUTF16CollationIterator::resetToOffset(int32_t newOffset) {
     192           0 :     reset();
     193           0 :     start = segmentStart = pos = rawStart + newOffset;
     194           0 :     limit = rawLimit;
     195           0 :     checkDir = 1;
     196           0 : }
     197             : 
     198             : int32_t
     199           0 : FCDUTF16CollationIterator::getOffset() const {
     200           0 :     if(checkDir != 0 || start == segmentStart) {
     201           0 :         return (int32_t)(pos - rawStart);
     202           0 :     } else if(pos == start) {
     203           0 :         return (int32_t)(segmentStart - rawStart);
     204             :     } else {
     205           0 :         return (int32_t)(segmentLimit - rawStart);
     206             :     }
     207             : }
     208             : 
     209             : uint32_t
     210           0 : FCDUTF16CollationIterator::handleNextCE32(UChar32 &c, UErrorCode &errorCode) {
     211             :     for(;;) {
     212           0 :         if(checkDir > 0) {
     213           0 :             if(pos == limit) {
     214           0 :                 c = U_SENTINEL;
     215           0 :                 return Collation::FALLBACK_CE32;
     216             :             }
     217           0 :             c = *pos++;
     218           0 :             if(CollationFCD::hasTccc(c)) {
     219           0 :                 if(CollationFCD::maybeTibetanCompositeVowel(c) ||
     220           0 :                         (pos != limit && CollationFCD::hasLccc(*pos))) {
     221           0 :                     --pos;
     222           0 :                     if(!nextSegment(errorCode)) {
     223           0 :                         c = U_SENTINEL;
     224           0 :                         return Collation::FALLBACK_CE32;
     225             :                     }
     226           0 :                     c = *pos++;
     227             :                 }
     228             :             }
     229           0 :             break;
     230           0 :         } else if(checkDir == 0 && pos != limit) {
     231           0 :             c = *pos++;
     232           0 :             break;
     233             :         } else {
     234           0 :             switchToForward();
     235             :         }
     236             :     }
     237           0 :     return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
     238             : }
     239             : 
     240             : UBool
     241           0 : FCDUTF16CollationIterator::foundNULTerminator() {
     242           0 :     if(limit == NULL) {
     243           0 :         limit = rawLimit = --pos;
     244           0 :         return TRUE;
     245             :     } else {
     246           0 :         return FALSE;
     247             :     }
     248             : }
     249             : 
     250             : UChar32
     251           0 : FCDUTF16CollationIterator::nextCodePoint(UErrorCode &errorCode) {
     252             :     UChar32 c;
     253             :     for(;;) {
     254           0 :         if(checkDir > 0) {
     255           0 :             if(pos == limit) {
     256           0 :                 return U_SENTINEL;
     257             :             }
     258           0 :             c = *pos++;
     259           0 :             if(CollationFCD::hasTccc(c)) {
     260           0 :                 if(CollationFCD::maybeTibetanCompositeVowel(c) ||
     261           0 :                         (pos != limit && CollationFCD::hasLccc(*pos))) {
     262           0 :                     --pos;
     263           0 :                     if(!nextSegment(errorCode)) {
     264           0 :                         return U_SENTINEL;
     265             :                     }
     266           0 :                     c = *pos++;
     267             :                 }
     268           0 :             } else if(c == 0 && limit == NULL) {
     269           0 :                 limit = rawLimit = --pos;
     270           0 :                 return U_SENTINEL;
     271             :             }
     272           0 :             break;
     273           0 :         } else if(checkDir == 0 && pos != limit) {
     274           0 :             c = *pos++;
     275           0 :             break;
     276             :         } else {
     277           0 :             switchToForward();
     278             :         }
     279             :     }
     280             :     UChar trail;
     281           0 :     if(U16_IS_LEAD(c) && pos != limit && U16_IS_TRAIL(trail = *pos)) {
     282           0 :         ++pos;
     283           0 :         return U16_GET_SUPPLEMENTARY(c, trail);
     284             :     } else {
     285           0 :         return c;
     286             :     }
     287             : }
     288             : 
     289             : UChar32
     290           0 : FCDUTF16CollationIterator::previousCodePoint(UErrorCode &errorCode) {
     291             :     UChar32 c;
     292             :     for(;;) {
     293           0 :         if(checkDir < 0) {
     294           0 :             if(pos == start) {
     295           0 :                 return U_SENTINEL;
     296             :             }
     297           0 :             c = *--pos;
     298           0 :             if(CollationFCD::hasLccc(c)) {
     299           0 :                 if(CollationFCD::maybeTibetanCompositeVowel(c) ||
     300           0 :                         (pos != start && CollationFCD::hasTccc(*(pos - 1)))) {
     301           0 :                     ++pos;
     302           0 :                     if(!previousSegment(errorCode)) {
     303           0 :                         return U_SENTINEL;
     304             :                     }
     305           0 :                     c = *--pos;
     306             :                 }
     307             :             }
     308           0 :             break;
     309           0 :         } else if(checkDir == 0 && pos != start) {
     310           0 :             c = *--pos;
     311           0 :             break;
     312             :         } else {
     313           0 :             switchToBackward();
     314             :         }
     315             :     }
     316             :     UChar lead;
     317           0 :     if(U16_IS_TRAIL(c) && pos != start && U16_IS_LEAD(lead = *(pos - 1))) {
     318           0 :         --pos;
     319           0 :         return U16_GET_SUPPLEMENTARY(lead, c);
     320             :     } else {
     321           0 :         return c;
     322             :     }
     323             : }
     324             : 
     325             : void
     326           0 : FCDUTF16CollationIterator::forwardNumCodePoints(int32_t num, UErrorCode &errorCode) {
     327             :     // Specify the class to avoid a virtual-function indirection.
     328             :     // In Java, we would declare this class final.
     329           0 :     while(num > 0 && FCDUTF16CollationIterator::nextCodePoint(errorCode) >= 0) {
     330           0 :         --num;
     331             :     }
     332           0 : }
     333             : 
     334             : void
     335           0 : FCDUTF16CollationIterator::backwardNumCodePoints(int32_t num, UErrorCode &errorCode) {
     336             :     // Specify the class to avoid a virtual-function indirection.
     337             :     // In Java, we would declare this class final.
     338           0 :     while(num > 0 && FCDUTF16CollationIterator::previousCodePoint(errorCode) >= 0) {
     339           0 :         --num;
     340             :     }
     341           0 : }
     342             : 
     343             : void
     344           0 : FCDUTF16CollationIterator::switchToForward() {
     345           0 :     U_ASSERT(checkDir < 0 || (checkDir == 0 && pos == limit));
     346           0 :     if(checkDir < 0) {
     347             :         // Turn around from backward checking.
     348           0 :         start = segmentStart = pos;
     349           0 :         if(pos == segmentLimit) {
     350           0 :             limit = rawLimit;
     351           0 :             checkDir = 1;  // Check forward.
     352             :         } else {  // pos < segmentLimit
     353           0 :             checkDir = 0;  // Stay in FCD segment.
     354             :         }
     355             :     } else {
     356             :         // Reached the end of the FCD segment.
     357           0 :         if(start == segmentStart) {
     358             :             // The input text segment is FCD, extend it forward.
     359             :         } else {
     360             :             // The input text segment needed to be normalized.
     361             :             // Switch to checking forward from it.
     362           0 :             pos = start = segmentStart = segmentLimit;
     363             :             // Note: If this segment is at the end of the input text,
     364             :             // then it might help to return FALSE to indicate that, so that
     365             :             // we do not have to re-check and normalize when we turn around and go backwards.
     366             :             // However, that would complicate the call sites for an optimization of an unusual case.
     367             :         }
     368           0 :         limit = rawLimit;
     369           0 :         checkDir = 1;
     370             :     }
     371           0 : }
     372             : 
     373             : UBool
     374           0 : FCDUTF16CollationIterator::nextSegment(UErrorCode &errorCode) {
     375           0 :     if(U_FAILURE(errorCode)) { return FALSE; }
     376           0 :     U_ASSERT(checkDir > 0 && pos != limit);
     377             :     // The input text [segmentStart..pos[ passes the FCD check.
     378           0 :     const UChar *p = pos;
     379           0 :     uint8_t prevCC = 0;
     380             :     for(;;) {
     381             :         // Fetch the next character's fcd16 value.
     382           0 :         const UChar *q = p;
     383           0 :         uint16_t fcd16 = nfcImpl.nextFCD16(p, rawLimit);
     384           0 :         uint8_t leadCC = (uint8_t)(fcd16 >> 8);
     385           0 :         if(leadCC == 0 && q != pos) {
     386             :             // FCD boundary before the [q, p[ character.
     387           0 :             limit = segmentLimit = q;
     388           0 :             break;
     389             :         }
     390           0 :         if(leadCC != 0 && (prevCC > leadCC || CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) {
     391             :             // Fails FCD check. Find the next FCD boundary and normalize.
     392           0 :             do {
     393           0 :                 q = p;
     394           0 :             } while(p != rawLimit && nfcImpl.nextFCD16(p, rawLimit) > 0xff);
     395           0 :             if(!normalize(pos, q, errorCode)) { return FALSE; }
     396           0 :             pos = start;
     397           0 :             break;
     398             :         }
     399           0 :         prevCC = (uint8_t)fcd16;
     400           0 :         if(p == rawLimit || prevCC == 0) {
     401             :             // FCD boundary after the last character.
     402           0 :             limit = segmentLimit = p;
     403           0 :             break;
     404             :         }
     405           0 :     }
     406           0 :     U_ASSERT(pos != limit);
     407           0 :     checkDir = 0;
     408           0 :     return TRUE;
     409             : }
     410             : 
     411             : void
     412           0 : FCDUTF16CollationIterator::switchToBackward() {
     413           0 :     U_ASSERT(checkDir > 0 || (checkDir == 0 && pos == start));
     414           0 :     if(checkDir > 0) {
     415             :         // Turn around from forward checking.
     416           0 :         limit = segmentLimit = pos;
     417           0 :         if(pos == segmentStart) {
     418           0 :             start = rawStart;
     419           0 :             checkDir = -1;  // Check backward.
     420             :         } else {  // pos > segmentStart
     421           0 :             checkDir = 0;  // Stay in FCD segment.
     422             :         }
     423             :     } else {
     424             :         // Reached the start of the FCD segment.
     425           0 :         if(start == segmentStart) {
     426             :             // The input text segment is FCD, extend it backward.
     427             :         } else {
     428             :             // The input text segment needed to be normalized.
     429             :             // Switch to checking backward from it.
     430           0 :             pos = limit = segmentLimit = segmentStart;
     431             :         }
     432           0 :         start = rawStart;
     433           0 :         checkDir = -1;
     434             :     }
     435           0 : }
     436             : 
     437             : UBool
     438           0 : FCDUTF16CollationIterator::previousSegment(UErrorCode &errorCode) {
     439           0 :     if(U_FAILURE(errorCode)) { return FALSE; }
     440           0 :     U_ASSERT(checkDir < 0 && pos != start);
     441             :     // The input text [pos..segmentLimit[ passes the FCD check.
     442           0 :     const UChar *p = pos;
     443           0 :     uint8_t nextCC = 0;
     444             :     for(;;) {
     445             :         // Fetch the previous character's fcd16 value.
     446           0 :         const UChar *q = p;
     447           0 :         uint16_t fcd16 = nfcImpl.previousFCD16(rawStart, p);
     448           0 :         uint8_t trailCC = (uint8_t)fcd16;
     449           0 :         if(trailCC == 0 && q != pos) {
     450             :             // FCD boundary after the [p, q[ character.
     451           0 :             start = segmentStart = q;
     452           0 :             break;
     453             :         }
     454           0 :         if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
     455           0 :                             CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) {
     456             :             // Fails FCD check. Find the previous FCD boundary and normalize.
     457           0 :             do {
     458           0 :                 q = p;
     459           0 :             } while(fcd16 > 0xff && p != rawStart &&
     460           0 :                     (fcd16 = nfcImpl.previousFCD16(rawStart, p)) != 0);
     461           0 :             if(!normalize(q, pos, errorCode)) { return FALSE; }
     462           0 :             pos = limit;
     463           0 :             break;
     464             :         }
     465           0 :         nextCC = (uint8_t)(fcd16 >> 8);
     466           0 :         if(p == rawStart || nextCC == 0) {
     467             :             // FCD boundary before the following character.
     468           0 :             start = segmentStart = p;
     469           0 :             break;
     470             :         }
     471           0 :     }
     472           0 :     U_ASSERT(pos != start);
     473           0 :     checkDir = 0;
     474           0 :     return TRUE;
     475             : }
     476             : 
     477             : UBool
     478           0 : FCDUTF16CollationIterator::normalize(const UChar *from, const UChar *to, UErrorCode &errorCode) {
     479             :     // NFD without argument checking.
     480           0 :     U_ASSERT(U_SUCCESS(errorCode));
     481           0 :     nfcImpl.decompose(from, to, normalized, (int32_t)(to - from), errorCode);
     482           0 :     if(U_FAILURE(errorCode)) { return FALSE; }
     483             :     // Switch collation processing into the FCD buffer
     484             :     // with the result of normalizing [segmentStart, segmentLimit[.
     485           0 :     segmentStart = from;
     486           0 :     segmentLimit = to;
     487           0 :     start = normalized.getBuffer();
     488           0 :     limit = start + normalized.length();
     489           0 :     return TRUE;
     490             : }
     491             : 
     492             : U_NAMESPACE_END
     493             : 
     494             : #endif  // !UCONFIG_NO_COLLATION

Generated by: LCOV version 1.13