LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationsettings.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 34 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 14 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2013-2015, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationsettings.h
       9             : *
      10             : * created on: 2013feb07
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #ifndef __COLLATIONSETTINGS_H__
      15             : #define __COLLATIONSETTINGS_H__
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_COLLATION
      20             : 
      21             : #include "unicode/ucol.h"
      22             : #include "collation.h"
      23             : #include "sharedobject.h"
      24             : #include "umutex.h"
      25             : 
      26             : U_NAMESPACE_BEGIN
      27             : 
      28             : struct CollationData;
      29             : 
      30             : /**
      31             :  * Collation settings/options/attributes.
      32             :  * These are the values that can be changed via API.
      33             :  */
      34             : struct U_I18N_API CollationSettings : public SharedObject {
      35             :     /**
      36             :      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
      37             :      */
      38             :     static const int32_t CHECK_FCD = 1;
      39             :     /**
      40             :      * Options bit 1: Numeric collation.
      41             :      * Also known as CODAN = COllate Digits As Numbers.
      42             :      *
      43             :      * Treat digit sequences as numbers with CE sequences in numeric order,
      44             :      * rather than returning a normal CE for each digit.
      45             :      */
      46             :     static const int32_t NUMERIC = 2;
      47             :     /**
      48             :      * "Shifted" alternate handling, see ALTERNATE_MASK.
      49             :      */
      50             :     static const int32_t SHIFTED = 4;
      51             :     /**
      52             :      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
      53             :      * Reserve values 8 and 0xc for shift-trimmed and blanked.
      54             :      */
      55             :     static const int32_t ALTERNATE_MASK = 0xc;
      56             :     /**
      57             :      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
      58             :      */
      59             :     static const int32_t MAX_VARIABLE_SHIFT = 4;
      60             :     /** maxVariable options bit mask before shifting. */
      61             :     static const int32_t MAX_VARIABLE_MASK = 0x70;
      62             :     /** Options bit 7: Reserved/unused/0. */
      63             :     /**
      64             :      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
      65             :      */
      66             :     static const int32_t UPPER_FIRST = 0x100;
      67             :     /**
      68             :      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
      69             :      * unless case level is on (when they are *moved* into the separate case level).
      70             :      * By default, the case bits are removed from the tertiary weight (ignored).
      71             :      *
      72             :      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
      73             :      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
      74             :      */
      75             :     static const int32_t CASE_FIRST = 0x200;
      76             :     /**
      77             :      * Options bit mask for caseFirst and upperFirst, before shifting.
      78             :      * Same value as caseFirst==upperFirst.
      79             :      */
      80             :     static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
      81             :     /**
      82             :      * Options bit 10: Insert the case level between the secondary and tertiary levels.
      83             :      */
      84             :     static const int32_t CASE_LEVEL = 0x400;
      85             :     /**
      86             :      * Options bit 11: Compare secondary weights backwards. ("French secondary")
      87             :      */
      88             :     static const int32_t BACKWARD_SECONDARY = 0x800;
      89             :     /**
      90             :      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
      91             :      * It is the top used bit field in the options. (No need to mask after shifting.)
      92             :      */
      93             :     static const int32_t STRENGTH_SHIFT = 12;
      94             :     /** Strength options bit mask before shifting. */
      95             :     static const int32_t STRENGTH_MASK = 0xf000;
      96             : 
      97             :     /** maxVariable values */
      98             :     enum MaxVariable {
      99             :         MAX_VAR_SPACE,
     100             :         MAX_VAR_PUNCT,
     101             :         MAX_VAR_SYMBOL,
     102             :         MAX_VAR_CURRENCY
     103             :     };
     104             : 
     105           0 :     CollationSettings()
     106           0 :             : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) |
     107             :                       (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)),
     108             :               variableTop(0),
     109             :               reorderTable(NULL),
     110             :               minHighNoReorder(0),
     111             :               reorderRanges(NULL), reorderRangesLength(0),
     112             :               reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
     113           0 :               fastLatinOptions(-1) {}
     114             : 
     115             :     CollationSettings(const CollationSettings &other);
     116             :     virtual ~CollationSettings();
     117             : 
     118             :     UBool operator==(const CollationSettings &other) const;
     119             : 
     120           0 :     inline UBool operator!=(const CollationSettings &other) const {
     121           0 :         return !operator==(other);
     122             :     }
     123             : 
     124             :     int32_t hashCode() const;
     125             : 
     126             :     void resetReordering();
     127             :     void aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
     128             :                          const uint32_t *ranges, int32_t rangesLength,
     129             :                          const uint8_t *table, UErrorCode &errorCode);
     130             :     void setReordering(const CollationData &data, const int32_t *codes, int32_t codesLength,
     131             :                        UErrorCode &errorCode);
     132             :     void copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode);
     133             : 
     134           0 :     inline UBool hasReordering() const { return reorderTable != NULL; }
     135             :     static UBool reorderTableHasSplitBytes(const uint8_t table[256]);
     136           0 :     inline uint32_t reorder(uint32_t p) const {
     137           0 :         uint8_t b = reorderTable[p >> 24];
     138           0 :         if(b != 0 || p <= Collation::NO_CE_PRIMARY) {
     139           0 :             return ((uint32_t)b << 24) | (p & 0xffffff);
     140             :         } else {
     141           0 :             return reorderEx(p);
     142             :         }
     143             :     }
     144             : 
     145             :     void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
     146             : 
     147           0 :     static int32_t getStrength(int32_t options) {
     148           0 :         return options >> STRENGTH_SHIFT;
     149             :     }
     150             : 
     151           0 :     int32_t getStrength() const {
     152           0 :         return getStrength(options);
     153             :     }
     154             : 
     155             :     /** Sets the options bit for an on/off attribute. */
     156             :     void setFlag(int32_t bit, UColAttributeValue value,
     157             :                  int32_t defaultOptions, UErrorCode &errorCode);
     158             : 
     159             :     UColAttributeValue getFlag(int32_t bit) const {
     160             :         return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF;
     161             :     }
     162             : 
     163             :     void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode);
     164             : 
     165           0 :     UColAttributeValue getCaseFirst() const {
     166           0 :         int32_t option = options & CASE_FIRST_AND_UPPER_MASK;
     167           0 :         return (option == 0) ? UCOL_OFF :
     168           0 :                 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST;
     169             :     }
     170             : 
     171             :     void setAlternateHandling(UColAttributeValue value,
     172             :                               int32_t defaultOptions, UErrorCode &errorCode);
     173             : 
     174           0 :     UColAttributeValue getAlternateHandling() const {
     175           0 :         return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED;
     176             :     }
     177             : 
     178             :     void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
     179             : 
     180           0 :     MaxVariable getMaxVariable() const {
     181           0 :         return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT);
     182             :     }
     183             : 
     184             :     /**
     185             :      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
     186             :      */
     187           0 :     static inline UBool isTertiaryWithCaseBits(int32_t options) {
     188           0 :         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
     189             :     }
     190           0 :     static uint32_t getTertiaryMask(int32_t options) {
     191             :         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
     192           0 :         return isTertiaryWithCaseBits(options) ?
     193           0 :                 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK;
     194             :     }
     195             : 
     196           0 :     static UBool sortsTertiaryUpperCaseFirst(int32_t options) {
     197             :         // On tertiary level, consider case bits and sort uppercase first
     198             :         // if caseLevel is off and caseFirst==upperFirst.
     199           0 :         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
     200             :     }
     201             : 
     202           0 :     inline UBool dontCheckFCD() const {
     203           0 :         return (options & CHECK_FCD) == 0;
     204             :     }
     205             : 
     206             :     inline UBool hasBackwardSecondary() const {
     207             :         return (options & BACKWARD_SECONDARY) != 0;
     208             :     }
     209             : 
     210           0 :     inline UBool isNumeric() const {
     211           0 :         return (options & NUMERIC) != 0;
     212             :     }
     213             : 
     214             :     /** CHECK_FCD etc. */
     215             :     int32_t options;
     216             :     /** Variable-top primary weight. */
     217             :     uint32_t variableTop;
     218             :     /**
     219             :      * 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering.
     220             :      * A 0 entry at a non-zero index means that the primary lead byte is "split"
     221             :      * (there are different offsets for primaries that share that lead byte)
     222             :      * and the reordering offset must be determined via the reorderRanges.
     223             :      */
     224             :     const uint8_t *reorderTable;
     225             :     /** Limit of last reordered range. 0 if no reordering or no split bytes. */
     226             :     uint32_t minHighNoReorder;
     227             :     /**
     228             :      * Primary-weight ranges for script reordering,
     229             :      * to be used by reorder(p) for split-reordered primary lead bytes.
     230             :      *
     231             :      * Each entry is a (limit, offset) pair.
     232             :      * The upper 16 bits of the entry are the upper 16 bits of the
     233             :      * exclusive primary limit of a range.
     234             :      * Primaries between the previous limit and this one have their lead bytes
     235             :      * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits.
     236             :      *
     237             :      * CollationData::makeReorderRanges() writes a full list where the first range
     238             :      * (at least for terminators and separators) has a 0 offset.
     239             :      * The last range has a non-zero offset.
     240             :      * minHighNoReorder is set to the limit of that last range.
     241             :      *
     242             :      * In the settings object, the initial ranges before the first split lead byte
     243             :      * are omitted for efficiency; they are handled by reorder(p) via the reorderTable.
     244             :      * If there are no split-reordered lead bytes, then no ranges are needed.
     245             :      */
     246             :     const uint32_t *reorderRanges;
     247             :     int32_t reorderRangesLength;
     248             :     /** Array of reorder codes; ignored if reorderCodesLength == 0. */
     249             :     const int32_t *reorderCodes;
     250             :     /** Number of reorder codes; 0 if no reordering. */
     251             :     int32_t reorderCodesLength;
     252             :     /**
     253             :      * Capacity of reorderCodes.
     254             :      * If 0, then the codes, the ranges, and the table are aliases.
     255             :      * Otherwise, this object owns the memory via the reorderCodes pointer;
     256             :      * the codes, the ranges, and the table are in the same memory block, in that order.
     257             :      */
     258             :     int32_t reorderCodesCapacity;
     259             : 
     260             :     /** Options for CollationFastLatin. Negative if disabled. */
     261             :     int32_t fastLatinOptions;
     262             :     uint16_t fastLatinPrimaries[0x180];
     263             : 
     264             : private:
     265             :     void setReorderArrays(const int32_t *codes, int32_t codesLength,
     266             :                           const uint32_t *ranges, int32_t rangesLength,
     267             :                           const uint8_t *table, UErrorCode &errorCode);
     268             :     uint32_t reorderEx(uint32_t p) const;
     269             : };
     270             : 
     271             : U_NAMESPACE_END
     272             : 
     273             : #endif  // !UCONFIG_NO_COLLATION
     274             : #endif  // __COLLATIONSETTINGS_H__

Generated by: LCOV version 1.13