LCOV - code coverage report
Current view: top level - intl/icu/source/i18n/unicode - tblcoll.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 10 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 4 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : * Copyright (C) 1996-2016, International Business Machines Corporation and
       6             : * others. All Rights Reserved.
       7             : ******************************************************************************
       8             : */
       9             : 
      10             : /**
      11             :  * \file
      12             :  * \brief C++ API: The RuleBasedCollator class implements the Collator abstract base class.
      13             :  */
      14             : 
      15             : /**
      16             : * File tblcoll.h
      17             : *
      18             : * Created by: Helena Shih
      19             : *
      20             : * Modification History:
      21             : *
      22             : *  Date        Name        Description
      23             : *  2/5/97      aliu        Added streamIn and streamOut methods.  Added
      24             : *                          constructor which reads RuleBasedCollator object from
      25             : *                          a binary file.  Added writeToFile method which streams
      26             : *                          RuleBasedCollator out to a binary file.  The streamIn
      27             : *                          and streamOut methods use istream and ostream objects
      28             : *                          in binary mode.
      29             : *  2/12/97     aliu        Modified to use TableCollationData sub-object to
      30             : *                          hold invariant data.
      31             : *  2/13/97     aliu        Moved several methods into this class from Collation.
      32             : *                          Added a private RuleBasedCollator(Locale&) constructor,
      33             : *                          to be used by Collator::createDefault().  General
      34             : *                          clean up.
      35             : *  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
      36             : *                          constructor and getDynamicClassID.
      37             : *  3/5/97      aliu        Modified constructFromFile() to add parameter
      38             : *                          specifying whether or not binary loading is to be
      39             : *                          attempted.  This is required for dynamic rule loading.
      40             : * 05/07/97     helena      Added memory allocation error detection.
      41             : *  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
      42             : *                          use MergeCollation::getPattern.
      43             : *  6/20/97     helena      Java class name change.
      44             : *  8/18/97     helena      Added internal API documentation.
      45             : * 09/03/97     helena      Added createCollationKeyValues().
      46             : * 02/10/98     damiba      Added compare with "length" parameter
      47             : * 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
      48             : * 04/23/99     stephen     Removed EDecompositionMode, merged with
      49             : *                          Normalizer::EMode
      50             : * 06/14/99     stephen     Removed kResourceBundleSuffix
      51             : * 11/02/99     helena      Collator performance enhancements.  Eliminates the
      52             : *                          UnicodeString construction and special case for NO_OP.
      53             : * 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
      54             : *                          internal state management.
      55             : * 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
      56             : *                          to implementation file.
      57             : * 01/29/01     synwee      Modified into a C++ wrapper which calls C API
      58             : *                          (ucol.h)
      59             : * 2012-2014    markus      Rewritten in C++ again.
      60             : */
      61             : 
      62             : #ifndef TBLCOLL_H
      63             : #define TBLCOLL_H
      64             : 
      65             : #include "unicode/utypes.h"
      66             : 
      67             : #if !UCONFIG_NO_COLLATION
      68             : 
      69             : #include "unicode/coll.h"
      70             : #include "unicode/locid.h"
      71             : #include "unicode/uiter.h"
      72             : #include "unicode/ucol.h"
      73             : 
      74             : U_NAMESPACE_BEGIN
      75             : 
      76             : struct CollationCacheEntry;
      77             : struct CollationData;
      78             : struct CollationSettings;
      79             : struct CollationTailoring;
      80             : /**
      81             : * @stable ICU 2.0
      82             : */
      83             : class StringSearch;
      84             : /**
      85             : * @stable ICU 2.0
      86             : */
      87             : class CollationElementIterator;
      88             : class CollationKey;
      89             : class SortKeyByteSink;
      90             : class UnicodeSet;
      91             : class UnicodeString;
      92             : class UVector64;
      93             : 
      94             : /**
      95             :  * The RuleBasedCollator class provides the implementation of
      96             :  * Collator, using data-driven tables. The user can create a customized
      97             :  * table-based collation.
      98             :  * <p>
      99             :  * For more information about the collation service see
     100             :  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
     101             :  * <p>
     102             :  * Collation service provides correct sorting orders for most locales supported in ICU.
     103             :  * If specific data for a locale is not available, the orders eventually falls back
     104             :  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
     105             :  * <p>
     106             :  * Sort ordering may be customized by providing your own set of rules. For more on
     107             :  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
     108             :  * Collation Customization</a> section of the User Guide.
     109             :  * <p>
     110             :  * Note, RuleBasedCollator is not to be subclassed.
     111             :  * @see        Collator
     112             :  */
     113             : class U_I18N_API RuleBasedCollator : public Collator {
     114             : public:
     115             :     /**
     116             :      * RuleBasedCollator constructor. This takes the table rules and builds a
     117             :      * collation table out of them. Please see RuleBasedCollator class
     118             :      * description for more details on the collation rule syntax.
     119             :      * @param rules the collation rules to build the collation table from.
     120             :      * @param status reporting a success or an error.
     121             :      * @stable ICU 2.0
     122             :      */
     123             :     RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
     124             : 
     125             :     /**
     126             :      * RuleBasedCollator constructor. This takes the table rules and builds a
     127             :      * collation table out of them. Please see RuleBasedCollator class
     128             :      * description for more details on the collation rule syntax.
     129             :      * @param rules the collation rules to build the collation table from.
     130             :      * @param collationStrength strength for comparison
     131             :      * @param status reporting a success or an error.
     132             :      * @stable ICU 2.0
     133             :      */
     134             :     RuleBasedCollator(const UnicodeString& rules,
     135             :                        ECollationStrength collationStrength,
     136             :                        UErrorCode& status);
     137             : 
     138             :     /**
     139             :      * RuleBasedCollator constructor. This takes the table rules and builds a
     140             :      * collation table out of them. Please see RuleBasedCollator class
     141             :      * description for more details on the collation rule syntax.
     142             :      * @param rules the collation rules to build the collation table from.
     143             :      * @param decompositionMode the normalisation mode
     144             :      * @param status reporting a success or an error.
     145             :      * @stable ICU 2.0
     146             :      */
     147             :     RuleBasedCollator(const UnicodeString& rules,
     148             :                     UColAttributeValue decompositionMode,
     149             :                     UErrorCode& status);
     150             : 
     151             :     /**
     152             :      * RuleBasedCollator constructor. This takes the table rules and builds a
     153             :      * collation table out of them. Please see RuleBasedCollator class
     154             :      * description for more details on the collation rule syntax.
     155             :      * @param rules the collation rules to build the collation table from.
     156             :      * @param collationStrength strength for comparison
     157             :      * @param decompositionMode the normalisation mode
     158             :      * @param status reporting a success or an error.
     159             :      * @stable ICU 2.0
     160             :      */
     161             :     RuleBasedCollator(const UnicodeString& rules,
     162             :                     ECollationStrength collationStrength,
     163             :                     UColAttributeValue decompositionMode,
     164             :                     UErrorCode& status);
     165             : 
     166             : #ifndef U_HIDE_INTERNAL_API
     167             :     /**
     168             :      * TODO: document & propose as public API
     169             :      * @internal
     170             :      */
     171             :     RuleBasedCollator(const UnicodeString &rules,
     172             :                       UParseError &parseError, UnicodeString &reason,
     173             :                       UErrorCode &errorCode);
     174             : #endif  /* U_HIDE_INTERNAL_API */
     175             : 
     176             :     /**
     177             :      * Copy constructor.
     178             :      * @param other the RuleBasedCollator object to be copied
     179             :      * @stable ICU 2.0
     180             :      */
     181             :     RuleBasedCollator(const RuleBasedCollator& other);
     182             : 
     183             : 
     184             :     /** Opens a collator from a collator binary image created using
     185             :     *  cloneBinary. Binary image used in instantiation of the
     186             :     *  collator remains owned by the user and should stay around for
     187             :     *  the lifetime of the collator. The API also takes a base collator
     188             :     *  which must be the root collator.
     189             :     *  @param bin binary image owned by the user and required through the
     190             :     *             lifetime of the collator
     191             :     *  @param length size of the image. If negative, the API will try to
     192             :     *                figure out the length of the image
     193             :     *  @param base Base collator, for lookup of untailored characters.
     194             :     *              Must be the root collator, must not be NULL.
     195             :     *              The base is required to be present through the lifetime of the collator.
     196             :     *  @param status for catching errors
     197             :     *  @return newly created collator
     198             :     *  @see cloneBinary
     199             :     *  @stable ICU 3.4
     200             :     */
     201             :     RuleBasedCollator(const uint8_t *bin, int32_t length,
     202             :                     const RuleBasedCollator *base,
     203             :                     UErrorCode &status);
     204             : 
     205             :     /**
     206             :      * Destructor.
     207             :      * @stable ICU 2.0
     208             :      */
     209             :     virtual ~RuleBasedCollator();
     210             : 
     211             :     /**
     212             :      * Assignment operator.
     213             :      * @param other other RuleBasedCollator object to copy from.
     214             :      * @stable ICU 2.0
     215             :      */
     216             :     RuleBasedCollator& operator=(const RuleBasedCollator& other);
     217             : 
     218             :     /**
     219             :      * Returns true if argument is the same as this object.
     220             :      * @param other Collator object to be compared.
     221             :      * @return true if arguments is the same as this object.
     222             :      * @stable ICU 2.0
     223             :      */
     224             :     virtual UBool operator==(const Collator& other) const;
     225             : 
     226             :     /**
     227             :      * Makes a copy of this object.
     228             :      * @return a copy of this object, owned by the caller
     229             :      * @stable ICU 2.0
     230             :      */
     231             :     virtual Collator* clone(void) const;
     232             : 
     233             :     /**
     234             :      * Creates a collation element iterator for the source string. The caller of
     235             :      * this method is responsible for the memory management of the return
     236             :      * pointer.
     237             :      * @param source the string over which the CollationElementIterator will
     238             :      *        iterate.
     239             :      * @return the collation element iterator of the source string using this as
     240             :      *         the based Collator.
     241             :      * @stable ICU 2.2
     242             :      */
     243             :     virtual CollationElementIterator* createCollationElementIterator(
     244             :                                            const UnicodeString& source) const;
     245             : 
     246             :     /**
     247             :      * Creates a collation element iterator for the source. The caller of this
     248             :      * method is responsible for the memory management of the returned pointer.
     249             :      * @param source the CharacterIterator which produces the characters over
     250             :      *        which the CollationElementItgerator will iterate.
     251             :      * @return the collation element iterator of the source using this as the
     252             :      *         based Collator.
     253             :      * @stable ICU 2.2
     254             :      */
     255             :     virtual CollationElementIterator* createCollationElementIterator(
     256             :                                          const CharacterIterator& source) const;
     257             : 
     258             :     // Make deprecated versions of Collator::compare() visible.
     259             :     using Collator::compare;
     260             : 
     261             :     /**
     262             :     * The comparison function compares the character data stored in two
     263             :     * different strings. Returns information about whether a string is less
     264             :     * than, greater than or equal to another string.
     265             :     * @param source the source string to be compared with.
     266             :     * @param target the string that is to be compared with the source string.
     267             :     * @param status possible error code
     268             :     * @return Returns an enum value. UCOL_GREATER if source is greater
     269             :     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
     270             :     * than target
     271             :     * @stable ICU 2.6
     272             :     **/
     273             :     virtual UCollationResult compare(const UnicodeString& source,
     274             :                                      const UnicodeString& target,
     275             :                                      UErrorCode &status) const;
     276             : 
     277             :     /**
     278             :     * Does the same thing as compare but limits the comparison to a specified
     279             :     * length
     280             :     * @param source the source string to be compared with.
     281             :     * @param target the string that is to be compared with the source string.
     282             :     * @param length the length the comparison is limited to
     283             :     * @param status possible error code
     284             :     * @return Returns an enum value. UCOL_GREATER if source (up to the specified
     285             :     *         length) is greater than target; UCOL_EQUAL if source (up to specified
     286             :     *         length) is equal to target; UCOL_LESS if source (up to the specified
     287             :     *         length) is less  than target.
     288             :     * @stable ICU 2.6
     289             :     */
     290             :     virtual UCollationResult compare(const UnicodeString& source,
     291             :                                      const UnicodeString& target,
     292             :                                      int32_t length,
     293             :                                      UErrorCode &status) const;
     294             : 
     295             :     /**
     296             :     * The comparison function compares the character data stored in two
     297             :     * different string arrays. Returns information about whether a string array
     298             :     * is less than, greater than or equal to another string array.
     299             :     * @param source the source string array to be compared with.
     300             :     * @param sourceLength the length of the source string array.  If this value
     301             :     *        is equal to -1, the string array is null-terminated.
     302             :     * @param target the string that is to be compared with the source string.
     303             :     * @param targetLength the length of the target string array.  If this value
     304             :     *        is equal to -1, the string array is null-terminated.
     305             :     * @param status possible error code
     306             :     * @return Returns an enum value. UCOL_GREATER if source is greater
     307             :     * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
     308             :     * than target
     309             :     * @stable ICU 2.6
     310             :     */
     311             :     virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
     312             :                                      const char16_t* target, int32_t targetLength,
     313             :                                      UErrorCode &status) const;
     314             : 
     315             :     /**
     316             :      * Compares two strings using the Collator.
     317             :      * Returns whether the first one compares less than/equal to/greater than
     318             :      * the second one.
     319             :      * This version takes UCharIterator input.
     320             :      * @param sIter the first ("source") string iterator
     321             :      * @param tIter the second ("target") string iterator
     322             :      * @param status ICU status
     323             :      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
     324             :      * @stable ICU 4.2
     325             :      */
     326             :     virtual UCollationResult compare(UCharIterator &sIter,
     327             :                                      UCharIterator &tIter,
     328             :                                      UErrorCode &status) const;
     329             : 
     330             :     /**
     331             :      * Compares two UTF-8 strings using the Collator.
     332             :      * Returns whether the first one compares less than/equal to/greater than
     333             :      * the second one.
     334             :      * This version takes UTF-8 input.
     335             :      * Note that a StringPiece can be implicitly constructed
     336             :      * from a std::string or a NUL-terminated const char * string.
     337             :      * @param source the first UTF-8 string
     338             :      * @param target the second UTF-8 string
     339             :      * @param status ICU status
     340             :      * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
     341             :      * @stable ICU 51
     342             :      */
     343             :     virtual UCollationResult compareUTF8(const StringPiece &source,
     344             :                                          const StringPiece &target,
     345             :                                          UErrorCode &status) const;
     346             : 
     347             :     /**
     348             :      * Transforms the string into a series of characters
     349             :      * that can be compared with CollationKey.compare().
     350             :      *
     351             :      * Note that sort keys are often less efficient than simply doing comparison.
     352             :      * For more details, see the ICU User Guide.
     353             :      *
     354             :      * @param source the source string.
     355             :      * @param key the transformed key of the source string.
     356             :      * @param status the error code status.
     357             :      * @return the transformed key.
     358             :      * @see CollationKey
     359             :      * @stable ICU 2.0
     360             :      */
     361             :     virtual CollationKey& getCollationKey(const UnicodeString& source,
     362             :                                           CollationKey& key,
     363             :                                           UErrorCode& status) const;
     364             : 
     365             :     /**
     366             :      * Transforms a specified region of the string into a series of characters
     367             :      * that can be compared with CollationKey.compare.
     368             :      *
     369             :      * Note that sort keys are often less efficient than simply doing comparison.
     370             :      * For more details, see the ICU User Guide.
     371             :      *
     372             :      * @param source the source string.
     373             :      * @param sourceLength the length of the source string.
     374             :      * @param key the transformed key of the source string.
     375             :      * @param status the error code status.
     376             :      * @return the transformed key.
     377             :      * @see CollationKey
     378             :      * @stable ICU 2.0
     379             :      */
     380             :     virtual CollationKey& getCollationKey(const char16_t *source,
     381             :                                           int32_t sourceLength,
     382             :                                           CollationKey& key,
     383             :                                           UErrorCode& status) const;
     384             : 
     385             :     /**
     386             :      * Generates the hash code for the rule-based collation object.
     387             :      * @return the hash code.
     388             :      * @stable ICU 2.0
     389             :      */
     390             :     virtual int32_t hashCode() const;
     391             : 
     392             :     /**
     393             :     * Gets the locale of the Collator
     394             :     * @param type can be either requested, valid or actual locale. For more
     395             :     *             information see the definition of ULocDataLocaleType in
     396             :     *             uloc.h
     397             :     * @param status the error code status.
     398             :     * @return locale where the collation data lives. If the collator
     399             :     *         was instantiated from rules, locale is empty.
     400             :     * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
     401             :     */
     402             :     virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
     403             : 
     404             :     /**
     405             :      * Gets the tailoring rules for this collator.
     406             :      * @return the collation tailoring from which this collator was created
     407             :      * @stable ICU 2.0
     408             :      */
     409             :     const UnicodeString& getRules() const;
     410             : 
     411             :     /**
     412             :      * Gets the version information for a Collator.
     413             :      * @param info the version # information, the result will be filled in
     414             :      * @stable ICU 2.0
     415             :      */
     416             :     virtual void getVersion(UVersionInfo info) const;
     417             : 
     418             : #ifndef U_HIDE_DEPRECATED_API
     419             :     /**
     420             :      * Returns the maximum length of any expansion sequences that end with the
     421             :      * specified comparison order.
     422             :      *
     423             :      * This is specific to the kind of collation element values and sequences
     424             :      * returned by the CollationElementIterator.
     425             :      * Call CollationElementIterator::getMaxExpansion() instead.
     426             :      *
     427             :      * @param order a collation order returned by CollationElementIterator::previous
     428             :      *              or CollationElementIterator::next.
     429             :      * @return maximum size of the expansion sequences ending with the collation
     430             :      *         element, or 1 if the collation element does not occur at the end of
     431             :      *         any expansion sequence
     432             :      * @see CollationElementIterator#getMaxExpansion
     433             :      * @deprecated ICU 51 Use CollationElementIterator::getMaxExpansion() instead.
     434             :      */
     435             :     int32_t getMaxExpansion(int32_t order) const;
     436             : #endif  /* U_HIDE_DEPRECATED_API */
     437             : 
     438             :     /**
     439             :      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
     440             :      * method is to implement a simple version of RTTI, since not all C++
     441             :      * compilers support genuine RTTI. Polymorphic operator==() and clone()
     442             :      * methods call this method.
     443             :      * @return The class ID for this object. All objects of a given class have
     444             :      *         the same class ID. Objects of other classes have different class
     445             :      *         IDs.
     446             :      * @stable ICU 2.0
     447             :      */
     448             :     virtual UClassID getDynamicClassID(void) const;
     449             : 
     450             :     /**
     451             :      * Returns the class ID for this class. This is useful only for comparing to
     452             :      * a return value from getDynamicClassID(). For example:
     453             :      * <pre>
     454             :      * Base* polymorphic_pointer = createPolymorphicObject();
     455             :      * if (polymorphic_pointer->getDynamicClassID() ==
     456             :      *                                          Derived::getStaticClassID()) ...
     457             :      * </pre>
     458             :      * @return The class ID for all objects of this class.
     459             :      * @stable ICU 2.0
     460             :      */
     461             :     static UClassID U_EXPORT2 getStaticClassID(void);
     462             : 
     463             : #ifndef U_HIDE_DEPRECATED_API
     464             :     /**
     465             :      * Do not use this method: The caller and the ICU library might use different heaps.
     466             :      * Use cloneBinary() instead which writes to caller-provided memory.
     467             :      *
     468             :      * Returns a binary format of this collator.
     469             :      * @param length Returns the length of the data, in bytes
     470             :      * @param status the error code status.
     471             :      * @return memory, owned by the caller, of size 'length' bytes.
     472             :      * @deprecated ICU 52. Use cloneBinary() instead.
     473             :      */
     474             :     uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
     475             : #endif  /* U_HIDE_DEPRECATED_API */
     476             : 
     477             :     /** Creates a binary image of a collator. This binary image can be stored and
     478             :     *  later used to instantiate a collator using ucol_openBinary.
     479             :     *  This API supports preflighting.
     480             :     *  @param buffer a fill-in buffer to receive the binary image
     481             :     *  @param capacity capacity of the destination buffer
     482             :     *  @param status for catching errors
     483             :     *  @return size of the image
     484             :     *  @see ucol_openBinary
     485             :     *  @stable ICU 3.4
     486             :     */
     487             :     int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
     488             : 
     489             :     /**
     490             :      * Returns current rules. Delta defines whether full rules are returned or
     491             :      * just the tailoring.
     492             :      *
     493             :      * getRules(void) should normally be used instead.
     494             :      * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
     495             :      * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
     496             :      * @param buffer UnicodeString to store the result rules
     497             :      * @stable ICU 2.2
     498             :      * @see UCOL_FULL_RULES
     499             :      */
     500             :     void getRules(UColRuleOption delta, UnicodeString &buffer) const;
     501             : 
     502             :     /**
     503             :      * Universal attribute setter
     504             :      * @param attr attribute type
     505             :      * @param value attribute value
     506             :      * @param status to indicate whether the operation went on smoothly or there were errors
     507             :      * @stable ICU 2.2
     508             :      */
     509             :     virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
     510             :                               UErrorCode &status);
     511             : 
     512             :     /**
     513             :      * Universal attribute getter.
     514             :      * @param attr attribute type
     515             :      * @param status to indicate whether the operation went on smoothly or there were errors
     516             :      * @return attribute value
     517             :      * @stable ICU 2.2
     518             :      */
     519             :     virtual UColAttributeValue getAttribute(UColAttribute attr,
     520             :                                             UErrorCode &status) const;
     521             : 
     522             :     /**
     523             :      * Sets the variable top to the top of the specified reordering group.
     524             :      * The variable top determines the highest-sorting character
     525             :      * which is affected by UCOL_ALTERNATE_HANDLING.
     526             :      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
     527             :      * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
     528             :      *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
     529             :      *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
     530             :      * @param errorCode Standard ICU error code. Its input value must
     531             :      *                  pass the U_SUCCESS() test, or else the function returns
     532             :      *                  immediately. Check for U_FAILURE() on output or use with
     533             :      *                  function chaining. (See User Guide for details.)
     534             :      * @return *this
     535             :      * @see getMaxVariable
     536             :      * @stable ICU 53
     537             :      */
     538             :     virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
     539             : 
     540             :     /**
     541             :      * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
     542             :      * @return the maximum variable reordering group.
     543             :      * @see setMaxVariable
     544             :      * @stable ICU 53
     545             :      */
     546             :     virtual UColReorderCode getMaxVariable() const;
     547             : 
     548             :     /**
     549             :      * Sets the variable top to the primary weight of the specified string.
     550             :      *
     551             :      * Beginning with ICU 53, the variable top is pinned to
     552             :      * the top of one of the supported reordering groups,
     553             :      * and it must not be beyond the last of those groups.
     554             :      * See setMaxVariable().
     555             :      * @param varTop one or more (if contraction) char16_ts to which the variable top should be set
     556             :      * @param len length of variable top string. If -1 it is considered to be zero terminated.
     557             :      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
     558             :      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
     559             :      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
     560             :      *    the last reordering group supported by setMaxVariable()
     561             :      * @return variable top primary weight
     562             :      * @deprecated ICU 53 Call setMaxVariable() instead.
     563             :      */
     564             :     virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status);
     565             : 
     566             :     /**
     567             :      * Sets the variable top to the primary weight of the specified string.
     568             :      *
     569             :      * Beginning with ICU 53, the variable top is pinned to
     570             :      * the top of one of the supported reordering groups,
     571             :      * and it must not be beyond the last of those groups.
     572             :      * See setMaxVariable().
     573             :      * @param varTop a UnicodeString size 1 or more (if contraction) of char16_ts to which the variable top should be set
     574             :      * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
     575             :      *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
     576             :      *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
     577             :      *    the last reordering group supported by setMaxVariable()
     578             :      * @return variable top primary weight
     579             :      * @deprecated ICU 53 Call setMaxVariable() instead.
     580             :      */
     581             :     virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status);
     582             : 
     583             :     /**
     584             :      * Sets the variable top to the specified primary weight.
     585             :      *
     586             :      * Beginning with ICU 53, the variable top is pinned to
     587             :      * the top of one of the supported reordering groups,
     588             :      * and it must not be beyond the last of those groups.
     589             :      * See setMaxVariable().
     590             :      * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
     591             :      * @param status error code
     592             :      * @deprecated ICU 53 Call setMaxVariable() instead.
     593             :      */
     594             :     virtual void setVariableTop(uint32_t varTop, UErrorCode &status);
     595             : 
     596             :     /**
     597             :      * Gets the variable top value of a Collator.
     598             :      * @param status error code (not changed by function). If error code is set, the return value is undefined.
     599             :      * @return the variable top primary weight
     600             :      * @see getMaxVariable
     601             :      * @stable ICU 2.0
     602             :      */
     603             :     virtual uint32_t getVariableTop(UErrorCode &status) const;
     604             : 
     605             :     /**
     606             :      * Get a UnicodeSet that contains all the characters and sequences tailored in
     607             :      * this collator.
     608             :      * @param status      error code of the operation
     609             :      * @return a pointer to a UnicodeSet object containing all the
     610             :      *         code points and sequences that may sort differently than
     611             :      *         in the root collator. The object must be disposed of by using delete
     612             :      * @stable ICU 2.4
     613             :      */
     614             :     virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
     615             : 
     616             :     /**
     617             :      * Get the sort key as an array of bytes from a UnicodeString.
     618             :      *
     619             :      * Note that sort keys are often less efficient than simply doing comparison.
     620             :      * For more details, see the ICU User Guide.
     621             :      *
     622             :      * @param source string to be processed.
     623             :      * @param result buffer to store result in. If NULL, number of bytes needed
     624             :      *        will be returned.
     625             :      * @param resultLength length of the result buffer. If if not enough the
     626             :      *        buffer will be filled to capacity.
     627             :      * @return Number of bytes needed for storing the sort key
     628             :      * @stable ICU 2.0
     629             :      */
     630             :     virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
     631             :                                int32_t resultLength) const;
     632             : 
     633             :     /**
     634             :      * Get the sort key as an array of bytes from a char16_t buffer.
     635             :      *
     636             :      * Note that sort keys are often less efficient than simply doing comparison.
     637             :      * For more details, see the ICU User Guide.
     638             :      *
     639             :      * @param source string to be processed.
     640             :      * @param sourceLength length of string to be processed. If -1, the string
     641             :      *        is 0 terminated and length will be decided by the function.
     642             :      * @param result buffer to store result in. If NULL, number of bytes needed
     643             :      *        will be returned.
     644             :      * @param resultLength length of the result buffer. If if not enough the
     645             :      *        buffer will be filled to capacity.
     646             :      * @return Number of bytes needed for storing the sort key
     647             :      * @stable ICU 2.2
     648             :      */
     649             :     virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength,
     650             :                                uint8_t *result, int32_t resultLength) const;
     651             : 
     652             :     /**
     653             :      * Retrieves the reordering codes for this collator.
     654             :      * @param dest The array to fill with the script ordering.
     655             :      * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
     656             :      *  will only return the length of the result without writing any codes (pre-flighting).
     657             :      * @param status A reference to an error code value, which must not indicate
     658             :      * a failure before the function call.
     659             :      * @return The length of the script ordering array.
     660             :      * @see ucol_setReorderCodes
     661             :      * @see Collator#getEquivalentReorderCodes
     662             :      * @see Collator#setReorderCodes
     663             :      * @stable ICU 4.8
     664             :      */
     665             :      virtual int32_t getReorderCodes(int32_t *dest,
     666             :                                      int32_t destCapacity,
     667             :                                      UErrorCode& status) const;
     668             : 
     669             :     /**
     670             :      * Sets the ordering of scripts for this collator.
     671             :      * @param reorderCodes An array of script codes in the new order. This can be NULL if the
     672             :      * length is also set to 0. An empty array will clear any reordering codes on the collator.
     673             :      * @param reorderCodesLength The length of reorderCodes.
     674             :      * @param status error code
     675             :      * @see ucol_setReorderCodes
     676             :      * @see Collator#getReorderCodes
     677             :      * @see Collator#getEquivalentReorderCodes
     678             :      * @stable ICU 4.8
     679             :      */
     680             :      virtual void setReorderCodes(const int32_t* reorderCodes,
     681             :                                   int32_t reorderCodesLength,
     682             :                                   UErrorCode& status) ;
     683             : 
     684             :     /**
     685             :      * Implements ucol_strcollUTF8().
     686             :      * @internal
     687             :      */
     688             :     virtual UCollationResult internalCompareUTF8(
     689             :             const char *left, int32_t leftLength,
     690             :             const char *right, int32_t rightLength,
     691             :             UErrorCode &errorCode) const;
     692             : 
     693             :     /** Get the short definition string for a collator. This internal API harvests the collator's
     694             :      *  locale and the attribute set and produces a string that can be used for opening
     695             :      *  a collator with the same attributes using the ucol_openFromShortString API.
     696             :      *  This string will be normalized.
     697             :      *  The structure and the syntax of the string is defined in the "Naming collators"
     698             :      *  section of the users guide:
     699             :      *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
     700             :      *  This function supports preflighting.
     701             :      *
     702             :      *  This is internal, and intended to be used with delegate converters.
     703             :      *
     704             :      *  @param locale a locale that will appear as a collators locale in the resulting
     705             :      *                short string definition. If NULL, the locale will be harvested
     706             :      *                from the collator.
     707             :      *  @param buffer space to hold the resulting string
     708             :      *  @param capacity capacity of the buffer
     709             :      *  @param status for returning errors. All the preflighting errors are featured
     710             :      *  @return length of the resulting string
     711             :      *  @see ucol_openFromShortString
     712             :      *  @see ucol_normalizeShortDefinitionString
     713             :      *  @see ucol_getShortDefinitionString
     714             :      *  @internal
     715             :      */
     716             :     virtual int32_t internalGetShortDefinitionString(const char *locale,
     717             :                                                      char *buffer,
     718             :                                                      int32_t capacity,
     719             :                                                      UErrorCode &status) const;
     720             : 
     721             :     /**
     722             :      * Implements ucol_nextSortKeyPart().
     723             :      * @internal
     724             :      */
     725             :     virtual int32_t internalNextSortKeyPart(
     726             :             UCharIterator *iter, uint32_t state[2],
     727             :             uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
     728             : 
     729             :     // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
     730             :     /**
     731             :      * Only for use in ucol_openRules().
     732             :      * @internal
     733             :      */
     734             :     RuleBasedCollator();
     735             : 
     736             : #ifndef U_HIDE_INTERNAL_API
     737             :     /**
     738             :      * Implements ucol_getLocaleByType().
     739             :      * Needed because the lifetime of the locale ID string must match that of the collator.
     740             :      * getLocale() returns a copy of a Locale, with minimal lifetime in a C wrapper.
     741             :      * @internal
     742             :      */
     743             :     const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
     744             : 
     745             :     /**
     746             :      * Implements ucol_getContractionsAndExpansions().
     747             :      * Gets this collator's sets of contraction strings and/or
     748             :      * characters and strings that map to multiple collation elements (expansions).
     749             :      * If addPrefixes is TRUE, then contractions that are expressed as
     750             :      * prefix/pre-context rules are included.
     751             :      * @param contractions if not NULL, the set to hold the contractions
     752             :      * @param expansions if not NULL, the set to hold the expansions
     753             :      * @param addPrefixes include prefix contextual mappings
     754             :      * @param errorCode in/out ICU error code
     755             :      * @internal
     756             :      */
     757             :     void internalGetContractionsAndExpansions(
     758             :             UnicodeSet *contractions, UnicodeSet *expansions,
     759             :             UBool addPrefixes, UErrorCode &errorCode) const;
     760             : 
     761             :     /**
     762             :      * Adds the contractions that start with character c to the set.
     763             :      * Ignores prefixes. Used by AlphabeticIndex.
     764             :      * @internal
     765             :      */
     766             :     void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
     767             : 
     768             :     /**
     769             :      * Implements from-rule constructors, and ucol_openRules().
     770             :      * @internal
     771             :      */
     772             :     void internalBuildTailoring(
     773             :             const UnicodeString &rules,
     774             :             int32_t strength,
     775             :             UColAttributeValue decompositionMode,
     776             :             UParseError *outParseError, UnicodeString *outReason,
     777             :             UErrorCode &errorCode);
     778             : 
     779             :     /** @internal */
     780             :     static inline RuleBasedCollator *rbcFromUCollator(UCollator *uc) {
     781             :         return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
     782             :     }
     783             :     /** @internal */
     784           0 :     static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
     785           0 :         return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
     786             :     }
     787             : 
     788             :     /**
     789             :      * Appends the CEs for the string to the vector.
     790             :      * @internal for tests & tools
     791             :      */
     792             :     void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
     793             : #endif  // U_HIDE_INTERNAL_API
     794             : 
     795             : protected:
     796             :    /**
     797             :     * Used internally by registration to define the requested and valid locales.
     798             :     * @param requestedLocale the requested locale
     799             :     * @param validLocale the valid locale
     800             :     * @param actualLocale the actual locale
     801             :     * @internal
     802             :     */
     803             :     virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
     804             : 
     805             : private:
     806             :     friend class CollationElementIterator;
     807             :     friend class Collator;
     808             : 
     809             :     RuleBasedCollator(const CollationCacheEntry *entry);
     810             : 
     811             :     /**
     812             :      * Enumeration of attributes that are relevant for short definition strings
     813             :      * (e.g., ucol_getShortDefinitionString()).
     814             :      * Effectively extends UColAttribute.
     815             :      */
     816             :     enum Attributes {
     817             :         ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
     818             :         ATTR_LIMIT
     819             :     };
     820             : 
     821             :     void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
     822             : 
     823             :     // Both lengths must be <0 or else both must be >=0.
     824             :     UCollationResult doCompare(const char16_t *left, int32_t leftLength,
     825             :                                const char16_t *right, int32_t rightLength,
     826             :                                UErrorCode &errorCode) const;
     827             :     UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
     828             :                                const uint8_t *right, int32_t rightLength,
     829             :                                UErrorCode &errorCode) const;
     830             : 
     831             :     void writeSortKey(const char16_t *s, int32_t length,
     832             :                       SortKeyByteSink &sink, UErrorCode &errorCode) const;
     833             : 
     834             :     void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
     835             :                              SortKeyByteSink &sink, UErrorCode &errorCode) const;
     836             : 
     837             :     const CollationSettings &getDefaultSettings() const;
     838             : 
     839           0 :     void setAttributeDefault(int32_t attribute) {
     840           0 :         explicitlySetAttributes &= ~((uint32_t)1 << attribute);
     841           0 :     }
     842           0 :     void setAttributeExplicitly(int32_t attribute) {
     843           0 :         explicitlySetAttributes |= (uint32_t)1 << attribute;
     844           0 :     }
     845           0 :     UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
     846             :         // assert(0 <= attribute < ATTR_LIMIT);
     847           0 :         return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
     848             :     }
     849             : 
     850             :     /**
     851             :      * Tests whether a character is "unsafe" for use as a collation starting point.
     852             :      *
     853             :      * @param c code point or code unit
     854             :      * @return TRUE if c is unsafe
     855             :      * @see CollationElementIterator#setOffset(int)
     856             :      */
     857             :     UBool isUnsafe(UChar32 c) const;
     858             : 
     859             :     static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
     860             :     UBool initMaxExpansions(UErrorCode &errorCode) const;
     861             : 
     862             :     void setFastLatinOptions(CollationSettings &ownedSettings) const;
     863             : 
     864             :     const CollationData *data;
     865             :     const CollationSettings *settings;  // reference-counted
     866             :     const CollationTailoring *tailoring;  // alias of cacheEntry->tailoring
     867             :     const CollationCacheEntry *cacheEntry;  // reference-counted
     868             :     Locale validLocale;
     869             :     uint32_t explicitlySetAttributes;
     870             : 
     871             :     UBool actualLocaleIsSameAsValid;
     872             : };
     873             : 
     874             : U_NAMESPACE_END
     875             : 
     876             : #endif  // !UCONFIG_NO_COLLATION
     877             : #endif  // TBLCOLL_H

Generated by: LCOV version 1.13