LCOV - code coverage report
Current view: top level - intl/icu/source/i18n/unicode - coleitr.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 9 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             :  ******************************************************************************
       5             :  *   Copyright (C) 1997-2014, International Business Machines
       6             :  *   Corporation and others.  All Rights Reserved.
       7             :  ******************************************************************************
       8             :  */
       9             : 
      10             : /**
      11             :  * \file 
      12             :  * \brief C++ API: Collation Element Iterator.
      13             :  */
      14             : 
      15             : /**
      16             : * File coleitr.h
      17             : *
      18             : * Created by: Helena Shih
      19             : *
      20             : * Modification History:
      21             : *
      22             : *  Date       Name        Description
      23             : *
      24             : *  8/18/97    helena      Added internal API documentation.
      25             : * 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
      26             : * 12/10/99    aliu        Ported Thai collation support from Java.
      27             : * 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
      28             : * 02/19/01    swquek      Removed CollationElementsIterator() since it is 
      29             : *                         private constructor and no calls are made to it
      30             : * 2012-2014   markus      Rewritten in C++ again.
      31             : */
      32             : 
      33             : #ifndef COLEITR_H
      34             : #define COLEITR_H
      35             : 
      36             : #include "unicode/utypes.h"
      37             : 
      38             : #if !UCONFIG_NO_COLLATION
      39             : 
      40             : #include "unicode/unistr.h"
      41             : #include "unicode/uobject.h"
      42             : 
      43             : struct UCollationElements;
      44             : struct UHashtable;
      45             : 
      46             : U_NAMESPACE_BEGIN
      47             : 
      48             : struct CollationData;
      49             : 
      50             : class CharacterIterator;
      51             : class CollationIterator;
      52             : class RuleBasedCollator;
      53             : class UCollationPCE;
      54             : class UVector32;
      55             : 
      56             : /**
      57             : * The CollationElementIterator class is used as an iterator to walk through     
      58             : * each character of an international string. Use the iterator to return the
      59             : * ordering priority of the positioned character. The ordering priority of a 
      60             : * character, which we refer to as a key, defines how a character is collated in 
      61             : * the given collation object.
      62             : * For example, consider the following in Slovak and in traditional Spanish collation:
      63             : * <pre>
      64             : *        "ca" -> the first key is key('c') and second key is key('a').
      65             : *        "cha" -> the first key is key('ch') and second key is key('a').</pre>
      66             : * And in German phonebook collation,
      67             : * <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
      68             : *        the third key is key('b'). \endhtmlonly </pre>
      69             : * The key of a character, is an integer composed of primary order(short),
      70             : * secondary order(char), and tertiary order(char). Java strictly defines the 
      71             : * size and signedness of its primitive data types. Therefore, the static
      72             : * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return 
      73             : * int32_t to ensure the correctness of the key value.
      74             : * <p>Example of the iterator usage: (without error checking)
      75             : * <pre>
      76             : * \code
      77             : *   void CollationElementIterator_Example()
      78             : *   {
      79             : *       UnicodeString str = "This is a test";
      80             : *       UErrorCode success = U_ZERO_ERROR;
      81             : *       RuleBasedCollator* rbc =
      82             : *           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
      83             : *       CollationElementIterator* c =
      84             : *           rbc->createCollationElementIterator( str );
      85             : *       int32_t order = c->next(success);
      86             : *       c->reset();
      87             : *       order = c->previous(success);
      88             : *       delete c;
      89             : *       delete rbc;
      90             : *   }
      91             : * \endcode
      92             : * </pre>
      93             : * <p>
      94             : * The method next() returns the collation order of the next character based on
      95             : * the comparison level of the collator. The method previous() returns the
      96             : * collation order of the previous character based on the comparison level of
      97             : * the collator. The Collation Element Iterator moves only in one direction
      98             : * between calls to reset(), setOffset(), or setText(). That is, next() 
      99             : * and previous() can not be inter-used. Whenever previous() is to be called after 
     100             : * next() or vice versa, reset(), setOffset() or setText() has to be called first
     101             : * to reset the status, shifting pointers to either the end or the start of
     102             : * the string (reset() or setText()), or the specified position (setOffset()).
     103             : * Hence at the next call of next() or previous(), the first or last collation order,
     104             : * or collation order at the spefcifieid position will be returned. If a change of
     105             : * direction is done without one of these calls, the result is undefined.
     106             : * <p>
     107             : * The result of a forward iterate (next()) and reversed result of the backward
     108             : * iterate (previous()) on the same string are equivalent, if collation orders
     109             : * with the value 0 are ignored.
     110             : * Character based on the comparison level of the collator.  A collation order 
     111             : * consists of primary order, secondary order and tertiary order.  The data 
     112             : * type of the collation order is <strong>int32_t</strong>. 
     113             : *
     114             : * Note, CollationElementIterator should not be subclassed.
     115             : * @see     Collator
     116             : * @see     RuleBasedCollator
     117             : * @version 1.8 Jan 16 2001
     118             : */
     119             : class U_I18N_API CollationElementIterator U_FINAL : public UObject {
     120             : public: 
     121             : 
     122             :     // CollationElementIterator public data member ------------------------------
     123             : 
     124             :     enum {
     125             :         /**
     126             :          * NULLORDER indicates that an error has occured while processing
     127             :          * @stable ICU 2.0
     128             :          */
     129             :         NULLORDER = (int32_t)0xffffffff
     130             :     };
     131             : 
     132             :     // CollationElementIterator public constructor/destructor -------------------
     133             : 
     134             :     /**
     135             :     * Copy constructor.
     136             :     *
     137             :     * @param other    the object to be copied from
     138             :     * @stable ICU 2.0
     139             :     */
     140             :     CollationElementIterator(const CollationElementIterator& other);
     141             : 
     142             :     /** 
     143             :     * Destructor
     144             :     * @stable ICU 2.0
     145             :     */
     146             :     virtual ~CollationElementIterator();
     147             : 
     148             :     // CollationElementIterator public methods ----------------------------------
     149             : 
     150             :     /**
     151             :     * Returns true if "other" is the same as "this"
     152             :     *
     153             :     * @param other    the object to be compared
     154             :     * @return         true if "other" is the same as "this"
     155             :     * @stable ICU 2.0
     156             :     */
     157             :     UBool operator==(const CollationElementIterator& other) const;
     158             : 
     159             :     /**
     160             :     * Returns true if "other" is not the same as "this".
     161             :     *
     162             :     * @param other    the object to be compared
     163             :     * @return         true if "other" is not the same as "this"
     164             :     * @stable ICU 2.0
     165             :     */
     166             :     UBool operator!=(const CollationElementIterator& other) const;
     167             : 
     168             :     /**
     169             :     * Resets the cursor to the beginning of the string.
     170             :     * @stable ICU 2.0
     171             :     */
     172             :     void reset(void);
     173             : 
     174             :     /**
     175             :     * Gets the ordering priority of the next character in the string.
     176             :     * @param status the error code status.
     177             :     * @return the next character's ordering. otherwise returns NULLORDER if an 
     178             :     *         error has occured or if the end of string has been reached
     179             :     * @stable ICU 2.0
     180             :     */
     181             :     int32_t next(UErrorCode& status);
     182             : 
     183             :     /**
     184             :     * Get the ordering priority of the previous collation element in the string.
     185             :     * @param status the error code status.
     186             :     * @return the previous element's ordering. otherwise returns NULLORDER if an 
     187             :     *         error has occured or if the start of string has been reached
     188             :     * @stable ICU 2.0
     189             :     */
     190             :     int32_t previous(UErrorCode& status);
     191             : 
     192             :     /**
     193             :     * Gets the primary order of a collation order.
     194             :     * @param order the collation order
     195             :     * @return the primary order of a collation order.
     196             :     * @stable ICU 2.0
     197             :     */
     198             :     static inline int32_t primaryOrder(int32_t order);
     199             : 
     200             :     /**
     201             :     * Gets the secondary order of a collation order.
     202             :     * @param order the collation order
     203             :     * @return the secondary order of a collation order.
     204             :     * @stable ICU 2.0
     205             :     */
     206             :     static inline int32_t secondaryOrder(int32_t order);
     207             : 
     208             :     /**
     209             :     * Gets the tertiary order of a collation order.
     210             :     * @param order the collation order
     211             :     * @return the tertiary order of a collation order.
     212             :     * @stable ICU 2.0
     213             :     */
     214             :     static inline int32_t tertiaryOrder(int32_t order);
     215             : 
     216             :     /**
     217             :     * Return the maximum length of any expansion sequences that end with the 
     218             :     * specified comparison order.
     219             :     * @param order a collation order returned by previous or next.
     220             :     * @return maximum size of the expansion sequences ending with the collation 
     221             :     *         element or 1 if collation element does not occur at the end of any 
     222             :     *         expansion sequence
     223             :     * @stable ICU 2.0
     224             :     */
     225             :     int32_t getMaxExpansion(int32_t order) const;
     226             : 
     227             :     /**
     228             :     * Gets the comparison order in the desired strength. Ignore the other
     229             :     * differences.
     230             :     * @param order The order value
     231             :     * @stable ICU 2.0
     232             :     */
     233             :     int32_t strengthOrder(int32_t order) const;
     234             : 
     235             :     /**
     236             :     * Sets the source string.
     237             :     * @param str the source string.
     238             :     * @param status the error code status.
     239             :     * @stable ICU 2.0
     240             :     */
     241             :     void setText(const UnicodeString& str, UErrorCode& status);
     242             : 
     243             :     /**
     244             :     * Sets the source string.
     245             :     * @param str the source character iterator.
     246             :     * @param status the error code status.
     247             :     * @stable ICU 2.0
     248             :     */
     249             :     void setText(CharacterIterator& str, UErrorCode& status);
     250             : 
     251             :     /**
     252             :     * Checks if a comparison order is ignorable.
     253             :     * @param order the collation order.
     254             :     * @return TRUE if a character is ignorable, FALSE otherwise.
     255             :     * @stable ICU 2.0
     256             :     */
     257             :     static inline UBool isIgnorable(int32_t order);
     258             : 
     259             :     /**
     260             :     * Gets the offset of the currently processed character in the source string.
     261             :     * @return the offset of the character.
     262             :     * @stable ICU 2.0
     263             :     */
     264             :     int32_t getOffset(void) const;
     265             : 
     266             :     /**
     267             :     * Sets the offset of the currently processed character in the source string.
     268             :     * @param newOffset the new offset.
     269             :     * @param status the error code status.
     270             :     * @return the offset of the character.
     271             :     * @stable ICU 2.0
     272             :     */
     273             :     void setOffset(int32_t newOffset, UErrorCode& status);
     274             : 
     275             :     /**
     276             :     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     277             :     *
     278             :     * @stable ICU 2.2
     279             :     */
     280             :     virtual UClassID getDynamicClassID() const;
     281             : 
     282             :     /**
     283             :     * ICU "poor man's RTTI", returns a UClassID for this class.
     284             :     *
     285             :     * @stable ICU 2.2
     286             :     */
     287             :     static UClassID U_EXPORT2 getStaticClassID();
     288             : 
     289             : #ifndef U_HIDE_INTERNAL_API
     290             :     /** @internal */
     291           0 :     static inline CollationElementIterator *fromUCollationElements(UCollationElements *uc) {
     292           0 :         return reinterpret_cast<CollationElementIterator *>(uc);
     293             :     }
     294             :     /** @internal */
     295           0 :     static inline const CollationElementIterator *fromUCollationElements(const UCollationElements *uc) {
     296           0 :         return reinterpret_cast<const CollationElementIterator *>(uc);
     297             :     }
     298             :     /** @internal */
     299           0 :     inline UCollationElements *toUCollationElements() {
     300           0 :         return reinterpret_cast<UCollationElements *>(this);
     301             :     }
     302             :     /** @internal */
     303             :     inline const UCollationElements *toUCollationElements() const {
     304             :         return reinterpret_cast<const UCollationElements *>(this);
     305             :     }
     306             : #endif  // U_HIDE_INTERNAL_API
     307             : 
     308             : private:
     309             :     friend class RuleBasedCollator;
     310             :     friend class UCollationPCE;
     311             : 
     312             :     /**
     313             :     * CollationElementIterator constructor. This takes the source string and the 
     314             :     * collation object. The cursor will walk thru the source string based on the 
     315             :     * predefined collation rules. If the source string is empty, NULLORDER will 
     316             :     * be returned on the calls to next().
     317             :     * @param sourceText    the source string.
     318             :     * @param order         the collation object.
     319             :     * @param status        the error code status.
     320             :     */
     321             :     CollationElementIterator(const UnicodeString& sourceText,
     322             :         const RuleBasedCollator* order, UErrorCode& status);
     323             :     // Note: The constructors should take settings & tailoring, not a collator,
     324             :     // to avoid circular dependencies.
     325             :     // However, for operator==() we would need to be able to compare tailoring data for equality
     326             :     // without making CollationData or CollationTailoring depend on TailoredSet.
     327             :     // (See the implementation of RuleBasedCollator::operator==().)
     328             :     // That might require creating an intermediate class that would be used
     329             :     // by both CollationElementIterator and RuleBasedCollator
     330             :     // but only contain the part of RBC== related to data and rules.
     331             : 
     332             :     /**
     333             :     * CollationElementIterator constructor. This takes the source string and the 
     334             :     * collation object.  The cursor will walk thru the source string based on the 
     335             :     * predefined collation rules.  If the source string is empty, NULLORDER will 
     336             :     * be returned on the calls to next().
     337             :     * @param sourceText    the source string.
     338             :     * @param order         the collation object.
     339             :     * @param status        the error code status.
     340             :     */
     341             :     CollationElementIterator(const CharacterIterator& sourceText,
     342             :         const RuleBasedCollator* order, UErrorCode& status);
     343             : 
     344             :     /**
     345             :     * Assignment operator
     346             :     *
     347             :     * @param other    the object to be copied
     348             :     */
     349             :     const CollationElementIterator&
     350             :         operator=(const CollationElementIterator& other);
     351             : 
     352             :     CollationElementIterator(); // default constructor not implemented
     353             : 
     354             :     /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
     355           0 :     inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
     356             : 
     357             :     static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
     358             : 
     359             :     static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
     360             : 
     361             :     // CollationElementIterator private data members ----------------------------
     362             : 
     363             :     CollationIterator *iter_;  // owned
     364             :     const RuleBasedCollator *rbc_;  // aliased
     365             :     uint32_t otherHalf_;
     366             :     /**
     367             :      * <0: backwards; 0: just after reset() (previous() begins from end);
     368             :      * 1: just after setOffset(); >1: forward
     369             :      */
     370             :     int8_t dir_;
     371             :     /**
     372             :      * Stores offsets from expansions and from unsafe-backwards iteration,
     373             :      * so that getOffset() returns intermediate offsets for the CEs
     374             :      * that are consistent with forward iteration.
     375             :      */
     376             :     UVector32 *offsets_;
     377             : 
     378             :     UnicodeString string_;
     379             : };
     380             : 
     381             : // CollationElementIterator inline method definitions --------------------------
     382             : 
     383           0 : inline int32_t CollationElementIterator::primaryOrder(int32_t order)
     384             : {
     385           0 :     return (order >> 16) & 0xffff;
     386             : }
     387             : 
     388             : inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
     389             : {
     390             :     return (order >> 8) & 0xff;
     391             : }
     392             : 
     393             : inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
     394             : {
     395             :     return order & 0xff;
     396             : }
     397             : 
     398             : inline UBool CollationElementIterator::isIgnorable(int32_t order)
     399             : {
     400             :     return (order & 0xffff0000) == 0;
     401             : }
     402             : 
     403             : U_NAMESPACE_END
     404             : 
     405             : #endif /* #if !UCONFIG_NO_COLLATION */
     406             : 
     407             : #endif

Generated by: LCOV version 1.13