LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationiterator.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 59 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 15 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2010-2014, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationiterator.h
       9             : *
      10             : * created on: 2010oct27
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #ifndef __COLLATIONITERATOR_H__
      15             : #define __COLLATIONITERATOR_H__
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_COLLATION
      20             : 
      21             : #include "cmemory.h"
      22             : #include "collation.h"
      23             : #include "collationdata.h"
      24             : 
      25             : U_NAMESPACE_BEGIN
      26             : 
      27             : class SkippedState;
      28             : class UCharsTrie;
      29             : class UVector32;
      30             : 
      31             : /**
      32             :  * Collation element iterator and abstract character iterator.
      33             :  *
      34             :  * When a method returns a code point value, it must be in 0..10FFFF,
      35             :  * except it can be negative as a sentinel value.
      36             :  */
      37             : class U_I18N_API CollationIterator : public UObject {
      38             : private:
      39             :     class CEBuffer {
      40             :     private:
      41             :         /** Large enough for CEs of most short strings. */
      42             :         static const int32_t INITIAL_CAPACITY = 40;
      43             :     public:
      44           0 :         CEBuffer() : length(0) {}
      45             :         ~CEBuffer();
      46             : 
      47           0 :         inline void append(int64_t ce, UErrorCode &errorCode) {
      48           0 :             if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) {
      49           0 :                 buffer[length++] = ce;
      50             :             }
      51           0 :         }
      52             : 
      53           0 :         inline void appendUnsafe(int64_t ce) {
      54           0 :             buffer[length++] = ce;
      55           0 :         }
      56             : 
      57             :         UBool ensureAppendCapacity(int32_t appCap, UErrorCode &errorCode);
      58             : 
      59           0 :         inline UBool incLength(UErrorCode &errorCode) {
      60             :             // Use INITIAL_CAPACITY for a very simple fastpath.
      61             :             // (Rather than buffer.getCapacity().)
      62           0 :             if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) {
      63           0 :                 ++length;
      64           0 :                 return TRUE;
      65             :             } else {
      66           0 :                 return FALSE;
      67             :             }
      68             :         }
      69             : 
      70           0 :         inline int64_t set(int32_t i, int64_t ce) {
      71           0 :             return buffer[i] = ce;
      72             :         }
      73           0 :         inline int64_t get(int32_t i) const { return buffer[i]; }
      74             : 
      75           0 :         const int64_t *getCEs() const { return buffer.getAlias(); }
      76             : 
      77             :         int32_t length;
      78             : 
      79             :     private:
      80             :         CEBuffer(const CEBuffer &);
      81             :         void operator=(const CEBuffer &);
      82             : 
      83             :         MaybeStackArray<int64_t, INITIAL_CAPACITY> buffer;
      84             :     };
      85             : 
      86             : public:
      87           0 :     CollationIterator(const CollationData *d, UBool numeric)
      88           0 :             : trie(d->trie),
      89             :               data(d),
      90             :               cesIndex(0),
      91             :               skipped(NULL),
      92             :               numCpFwd(-1),
      93           0 :               isNumeric(numeric) {}
      94             : 
      95             :     virtual ~CollationIterator();
      96             : 
      97             :     virtual UBool operator==(const CollationIterator &other) const;
      98             :     inline UBool operator!=(const CollationIterator &other) const {
      99             :         return !operator==(other);
     100             :     }
     101             : 
     102             :     /**
     103             :      * Resets the iterator state and sets the position to the specified offset.
     104             :      * Subclasses must implement, and must call the parent class method,
     105             :      * or CollationIterator::reset().
     106             :      */
     107             :     virtual void resetToOffset(int32_t newOffset) = 0;
     108             : 
     109             :     virtual int32_t getOffset() const = 0;
     110             : 
     111             :     /**
     112             :      * Returns the next collation element.
     113             :      */
     114           0 :     inline int64_t nextCE(UErrorCode &errorCode) {
     115           0 :         if(cesIndex < ceBuffer.length) {
     116             :             // Return the next buffered CE.
     117           0 :             return ceBuffer.get(cesIndex++);
     118             :         }
     119             :         // assert cesIndex == ceBuffer.length;
     120           0 :         if(!ceBuffer.incLength(errorCode)) {
     121           0 :             return Collation::NO_CE;
     122             :         }
     123             :         UChar32 c;
     124           0 :         uint32_t ce32 = handleNextCE32(c, errorCode);
     125           0 :         uint32_t t = ce32 & 0xff;
     126           0 :         if(t < Collation::SPECIAL_CE32_LOW_BYTE) {  // Forced-inline of isSpecialCE32(ce32).
     127             :             // Normal CE from the main data.
     128             :             // Forced-inline of ceFromSimpleCE32(ce32).
     129           0 :             return ceBuffer.set(cesIndex++,
     130           0 :                     ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8));
     131             :         }
     132             :         const CollationData *d;
     133             :         // The compiler should be able to optimize the previous and the following
     134             :         // comparisons of t with the same constant.
     135           0 :         if(t == Collation::SPECIAL_CE32_LOW_BYTE) {
     136           0 :             if(c < 0) {
     137           0 :                 return ceBuffer.set(cesIndex++, Collation::NO_CE);
     138             :             }
     139           0 :             d = data->base;
     140           0 :             ce32 = d->getCE32(c);
     141           0 :             t = ce32 & 0xff;
     142           0 :             if(t < Collation::SPECIAL_CE32_LOW_BYTE) {
     143             :                 // Normal CE from the base data.
     144           0 :                 return ceBuffer.set(cesIndex++,
     145           0 :                         ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8));
     146             :             }
     147             :         } else {
     148           0 :             d = data;
     149             :         }
     150           0 :         if(t == Collation::LONG_PRIMARY_CE32_LOW_BYTE) {
     151             :             // Forced-inline of ceFromLongPrimaryCE32(ce32).
     152           0 :             return ceBuffer.set(cesIndex++,
     153           0 :                     ((int64_t)(ce32 - t) << 32) | Collation::COMMON_SEC_AND_TER_CE);
     154             :         }
     155           0 :         return nextCEFromCE32(d, c, ce32, errorCode);
     156             :     }
     157             : 
     158             :     /**
     159             :      * Fetches all CEs.
     160             :      * @return getCEsLength()
     161             :      */
     162             :     int32_t fetchCEs(UErrorCode &errorCode);
     163             : 
     164             :     /**
     165             :      * Overwrites the current CE (the last one returned by nextCE()).
     166             :      */
     167           0 :     void setCurrentCE(int64_t ce) {
     168             :         // assert cesIndex > 0;
     169           0 :         ceBuffer.set(cesIndex - 1, ce);
     170           0 :     }
     171             : 
     172             :     /**
     173             :      * Returns the previous collation element.
     174             :      */
     175             :     int64_t previousCE(UVector32 &offsets, UErrorCode &errorCode);
     176             : 
     177           0 :     inline int32_t getCEsLength() const {
     178           0 :         return ceBuffer.length;
     179             :     }
     180             : 
     181           0 :     inline int64_t getCE(int32_t i) const {
     182           0 :         return ceBuffer.get(i);
     183             :     }
     184             : 
     185           0 :     const int64_t *getCEs() const {
     186           0 :         return ceBuffer.getCEs();
     187             :     }
     188             : 
     189           0 :     void clearCEs() {
     190           0 :         cesIndex = ceBuffer.length = 0;
     191           0 :     }
     192             : 
     193           0 :     void clearCEsIfNoneRemaining() {
     194           0 :         if(cesIndex == ceBuffer.length) { clearCEs(); }
     195           0 :     }
     196             : 
     197             :     /**
     198             :      * Returns the next code point (with post-increment).
     199             :      * Public for identical-level comparison and for testing.
     200             :      */
     201             :     virtual UChar32 nextCodePoint(UErrorCode &errorCode) = 0;
     202             : 
     203             :     /**
     204             :      * Returns the previous code point (with pre-decrement).
     205             :      * Public for identical-level comparison and for testing.
     206             :      */
     207             :     virtual UChar32 previousCodePoint(UErrorCode &errorCode) = 0;
     208             : 
     209             : protected:
     210             :     CollationIterator(const CollationIterator &other);
     211             : 
     212             :     void reset();
     213             : 
     214             :     /**
     215             :      * Returns the next code point and its local CE32 value.
     216             :      * Returns Collation::FALLBACK_CE32 at the end of the text (c<0)
     217             :      * or when c's CE32 value is to be looked up in the base data (fallback).
     218             :      *
     219             :      * The code point is used for fallbacks, context and implicit weights.
     220             :      * It is ignored when the returned CE32 is not special (e.g., FFFD_CE32).
     221             :      */
     222             :     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
     223             : 
     224             :     /**
     225             :      * Called when handleNextCE32() returns a LEAD_SURROGATE_TAG for a lead surrogate code unit.
     226             :      * Returns the trail surrogate in that case and advances past it,
     227             :      * if a trail surrogate follows the lead surrogate.
     228             :      * Otherwise returns any other code unit and does not advance.
     229             :      */
     230             :     virtual UChar handleGetTrailSurrogate();
     231             : 
     232             :     /**
     233             :      * Called when handleNextCE32() returns with c==0, to see whether it is a NUL terminator.
     234             :      * (Not needed in Java.)
     235             :      */
     236             :     virtual UBool foundNULTerminator();
     237             : 
     238             :     /**
     239             :      * @return FALSE if surrogate code points U+D800..U+DFFF
     240             :      *         map to their own implicit primary weights (for UTF-16),
     241             :      *         or TRUE if they map to CE(U+FFFD) (for UTF-8)
     242             :      */
     243             :     virtual UBool forbidSurrogateCodePoints() const;
     244             : 
     245             :     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0;
     246             : 
     247             :     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0;
     248             : 
     249             :     /**
     250             :      * Returns the CE32 from the data trie.
     251             :      * Normally the same as data->getCE32(), but overridden in the builder.
     252             :      * Call this only when the faster data->getCE32() cannot be used.
     253             :      */
     254             :     virtual uint32_t getDataCE32(UChar32 c) const;
     255             : 
     256             :     virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode);
     257             : 
     258             :     void appendCEsFromCE32(const CollationData *d, UChar32 c, uint32_t ce32,
     259             :                            UBool forward, UErrorCode &errorCode);
     260             : 
     261             :     // Main lookup trie of the data object.
     262             :     const UTrie2 *trie;
     263             :     const CollationData *data;
     264             : 
     265             : private:
     266             :     int64_t nextCEFromCE32(const CollationData *d, UChar32 c, uint32_t ce32,
     267             :                            UErrorCode &errorCode);
     268             : 
     269             :     uint32_t getCE32FromPrefix(const CollationData *d, uint32_t ce32,
     270             :                                UErrorCode &errorCode);
     271             : 
     272             :     UChar32 nextSkippedCodePoint(UErrorCode &errorCode);
     273             : 
     274             :     void backwardNumSkipped(int32_t n, UErrorCode &errorCode);
     275             : 
     276             :     uint32_t nextCE32FromContraction(
     277             :             const CollationData *d, uint32_t contractionCE32,
     278             :             const UChar *p, uint32_t ce32, UChar32 c,
     279             :             UErrorCode &errorCode);
     280             : 
     281             :     uint32_t nextCE32FromDiscontiguousContraction(
     282             :             const CollationData *d, UCharsTrie &suffixes, uint32_t ce32,
     283             :             int32_t lookAhead, UChar32 c,
     284             :             UErrorCode &errorCode);
     285             : 
     286             :     /**
     287             :      * Returns the previous CE when data->isUnsafeBackward(c, isNumeric).
     288             :      */
     289             :     int64_t previousCEUnsafe(UChar32 c, UVector32 &offsets, UErrorCode &errorCode);
     290             : 
     291             :     /**
     292             :      * Turns a string of digits (bytes 0..9)
     293             :      * into a sequence of CEs that will sort in numeric order.
     294             :      *
     295             :      * Starts from this ce32's digit value and consumes the following/preceding digits.
     296             :      * The digits string must not be empty and must not have leading zeros.
     297             :      */
     298             :     void appendNumericCEs(uint32_t ce32, UBool forward, UErrorCode &errorCode);
     299             : 
     300             :     /**
     301             :      * Turns 1..254 digits into a sequence of CEs.
     302             :      * Called by appendNumericCEs() for each segment of at most 254 digits.
     303             :      */
     304             :     void appendNumericSegmentCEs(const char *digits, int32_t length, UErrorCode &errorCode);
     305             : 
     306             :     CEBuffer ceBuffer;
     307             :     int32_t cesIndex;
     308             : 
     309             :     SkippedState *skipped;
     310             : 
     311             :     // Number of code points to read forward, or -1.
     312             :     // Used as a forward iteration limit in previousCEUnsafe().
     313             :     int32_t numCpFwd;
     314             :     // Numeric collation (CollationSettings::NUMERIC).
     315             :     UBool isNumeric;
     316             : };
     317             : 
     318             : U_NAMESPACE_END
     319             : 
     320             : #endif  // !UCONFIG_NO_COLLATION
     321             : #endif  // __COLLATIONITERATOR_H__

Generated by: LCOV version 1.13