LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - utf8collationiterator.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 6 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 2 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2012-2016, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * utf8collationiterator.h
       9             : *
      10             : * created on: 2012nov12 (from utf16collationiterator.h & uitercollationiterator.h)
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #ifndef __UTF8COLLATIONITERATOR_H__
      15             : #define __UTF8COLLATIONITERATOR_H__
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_COLLATION
      20             : 
      21             : #include "cmemory.h"
      22             : #include "collation.h"
      23             : #include "collationdata.h"
      24             : #include "collationiterator.h"
      25             : #include "normalizer2impl.h"
      26             : 
      27             : U_NAMESPACE_BEGIN
      28             : 
      29             : /**
      30             :  * UTF-8 collation element and character iterator.
      31             :  * Handles normalized UTF-8 text inline, with length or NUL-terminated.
      32             :  * Unnormalized text is handled by a subclass.
      33             :  */
      34             : class U_I18N_API UTF8CollationIterator : public CollationIterator {
      35             : public:
      36           0 :     UTF8CollationIterator(const CollationData *d, UBool numeric,
      37             :                           const uint8_t *s, int32_t p, int32_t len)
      38           0 :             : CollationIterator(d, numeric),
      39           0 :               u8(s), pos(p), length(len) {}
      40             : 
      41             :     virtual ~UTF8CollationIterator();
      42             : 
      43             :     virtual void resetToOffset(int32_t newOffset);
      44             : 
      45             :     virtual int32_t getOffset() const;
      46             : 
      47             :     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
      48             : 
      49             :     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
      50             : 
      51             : protected:
      52             :     /**
      53             :      * For byte sequences that are illegal in UTF-8, an error value may be returned
      54             :      * together with a bogus code point. The caller will ignore that code point.
      55             :      *
      56             :      * Special values may be returned for surrogate code points, which are also illegal in UTF-8,
      57             :      * but the caller will treat them like U+FFFD because forbidSurrogateCodePoints() returns TRUE.
      58             :      *
      59             :      * Valid lead surrogates are returned from inside a normalized text segment,
      60             :      * where handleGetTrailSurrogate() will return the matching trail surrogate.
      61             :      */
      62             :     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
      63             : 
      64             :     virtual UBool foundNULTerminator();
      65             : 
      66             :     virtual UBool forbidSurrogateCodePoints() const;
      67             : 
      68             :     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
      69             : 
      70             :     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
      71             : 
      72             :     const uint8_t *u8;
      73             :     int32_t pos;
      74             :     int32_t length;  // <0 for NUL-terminated strings
      75             : };
      76             : 
      77             : /**
      78             :  * Incrementally checks the input text for FCD and normalizes where necessary.
      79             :  */
      80             : class U_I18N_API FCDUTF8CollationIterator : public UTF8CollationIterator {
      81             : public:
      82           0 :     FCDUTF8CollationIterator(const CollationData *data, UBool numeric,
      83             :                              const uint8_t *s, int32_t p, int32_t len)
      84           0 :             : UTF8CollationIterator(data, numeric, s, p, len),
      85             :               state(CHECK_FWD), start(p),
      86           0 :               nfcImpl(data->nfcImpl) {}
      87             : 
      88             :     virtual ~FCDUTF8CollationIterator();
      89             : 
      90             :     virtual void resetToOffset(int32_t newOffset);
      91             : 
      92             :     virtual int32_t getOffset() const;
      93             : 
      94             :     virtual UChar32 nextCodePoint(UErrorCode &errorCode);
      95             : 
      96             :     virtual UChar32 previousCodePoint(UErrorCode &errorCode);
      97             : 
      98             : protected:
      99             :     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     100             : 
     101             :     virtual UChar handleGetTrailSurrogate();
     102             : 
     103             :     virtual UBool foundNULTerminator();
     104             : 
     105             :     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     106             : 
     107             :     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
     108             : 
     109             : private:
     110             :     UBool nextHasLccc() const;
     111             :     UBool previousHasTccc() const;
     112             : 
     113             :     /**
     114             :      * Switches to forward checking if possible.
     115             :      */
     116             :     void switchToForward();
     117             : 
     118             :     /**
     119             :      * Extends the FCD text segment forward or normalizes around pos.
     120             :      * @return TRUE if success
     121             :      */
     122             :     UBool nextSegment(UErrorCode &errorCode);
     123             : 
     124             :     /**
     125             :      * Switches to backward checking.
     126             :      */
     127             :     void switchToBackward();
     128             : 
     129             :     /**
     130             :      * Extends the FCD text segment backward or normalizes around pos.
     131             :      * @return TRUE if success
     132             :      */
     133             :     UBool previousSegment(UErrorCode &errorCode);
     134             : 
     135             :     UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
     136             : 
     137             :     enum State {
     138             :         /**
     139             :          * The input text [start..pos[ passes the FCD check.
     140             :          * Moving forward checks incrementally.
     141             :          * limit is undefined.
     142             :          */
     143             :         CHECK_FWD,
     144             :         /**
     145             :          * The input text [pos..limit[ passes the FCD check.
     146             :          * Moving backward checks incrementally.
     147             :          * start is undefined.
     148             :          */
     149             :         CHECK_BWD,
     150             :         /**
     151             :          * The input text [start..limit[ passes the FCD check.
     152             :          * pos tracks the current text index.
     153             :          */
     154             :         IN_FCD_SEGMENT,
     155             :         /**
     156             :          * The input text [start..limit[ failed the FCD check and was normalized.
     157             :          * pos tracks the current index in the normalized string.
     158             :          */
     159             :         IN_NORMALIZED
     160             :     };
     161             : 
     162             :     State state;
     163             : 
     164             :     int32_t start;
     165             :     int32_t limit;
     166             : 
     167             :     const Normalizer2Impl &nfcImpl;
     168             :     UnicodeString normalized;
     169             : };
     170             : 
     171             : U_NAMESPACE_END
     172             : 
     173             : #endif  // !UCONFIG_NO_COLLATION
     174             : #endif  // __UTF8COLLATIONITERATOR_H__

Generated by: LCOV version 1.13