LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationdatabuilder.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 19 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 9 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2012-2014, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationdatabuilder.h
       9             : *
      10             : * created on: 2012apr01
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #ifndef __COLLATIONDATABUILDER_H__
      15             : #define __COLLATIONDATABUILDER_H__
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_COLLATION
      20             : 
      21             : #include "unicode/uniset.h"
      22             : #include "unicode/unistr.h"
      23             : #include "unicode/uversion.h"
      24             : #include "collation.h"
      25             : #include "collationdata.h"
      26             : #include "collationsettings.h"
      27             : #include "normalizer2impl.h"
      28             : #include "utrie2.h"
      29             : #include "uvectr32.h"
      30             : #include "uvectr64.h"
      31             : #include "uvector.h"
      32             : 
      33             : U_NAMESPACE_BEGIN
      34             : 
      35             : struct ConditionalCE32;
      36             : 
      37             : class CollationFastLatinBuilder;
      38             : class CopyHelper;
      39             : class DataBuilderCollationIterator;
      40             : class UCharsTrieBuilder;
      41             : 
      42             : /**
      43             :  * Low-level CollationData builder.
      44             :  * Takes (character, CE) pairs and builds them into runtime data structures.
      45             :  * Supports characters with context prefixes and contraction suffixes.
      46             :  */
      47             : class U_I18N_API CollationDataBuilder : public UObject {
      48             : public:
      49             :     /**
      50             :      * Collation element modifier. Interface class for a modifier
      51             :      * that changes a tailoring builder's temporary CEs to final CEs.
      52             :      * Called for every non-special CE32 and every expansion CE.
      53             :      */
      54           0 :     class CEModifier : public UObject {
      55             :     public:
      56             :         virtual ~CEModifier();
      57             :         /** Returns a new CE to replace the non-special input CE32, or else Collation::NO_CE. */
      58             :         virtual int64_t modifyCE32(uint32_t ce32) const = 0;
      59             :         /** Returns a new CE to replace the input CE, or else Collation::NO_CE. */
      60             :         virtual int64_t modifyCE(int64_t ce) const = 0;
      61             :     };
      62             : 
      63             :     CollationDataBuilder(UErrorCode &errorCode);
      64             : 
      65             :     virtual ~CollationDataBuilder();
      66             : 
      67             :     void initForTailoring(const CollationData *b, UErrorCode &errorCode);
      68             : 
      69             :     virtual UBool isCompressibleLeadByte(uint32_t b) const;
      70             : 
      71           0 :     inline UBool isCompressiblePrimary(uint32_t p) const {
      72           0 :         return isCompressibleLeadByte(p >> 24);
      73             :     }
      74             : 
      75             :     /**
      76             :      * @return TRUE if this builder has mappings (e.g., add() has been called)
      77             :      */
      78           0 :     UBool hasMappings() const { return modified; }
      79             : 
      80             :     /**
      81             :      * @return TRUE if c has CEs in this builder
      82             :      */
      83             :     UBool isAssigned(UChar32 c) const;
      84             : 
      85             :     /**
      86             :      * @return the three-byte primary if c maps to a single such CE and has no context data,
      87             :      * otherwise returns 0.
      88             :      */
      89             :     uint32_t getLongPrimaryIfSingleCE(UChar32 c) const;
      90             : 
      91             :     /**
      92             :      * @return the single CE for c.
      93             :      * Sets an error code if c does not have a single CE.
      94             :      */
      95             :     int64_t getSingleCE(UChar32 c, UErrorCode &errorCode) const;
      96             : 
      97             :     void add(const UnicodeString &prefix, const UnicodeString &s,
      98             :              const int64_t ces[], int32_t cesLength,
      99             :              UErrorCode &errorCode);
     100             : 
     101             :     /**
     102             :      * Encodes the ces as either the returned ce32 by itself,
     103             :      * or by storing an expansion, with the returned ce32 referring to that.
     104             :      *
     105             :      * add(p, s, ces, cesLength) = addCE32(p, s, encodeCEs(ces, cesLength))
     106             :      */
     107             :     virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode);
     108             :     void addCE32(const UnicodeString &prefix, const UnicodeString &s,
     109             :                  uint32_t ce32, UErrorCode &errorCode);
     110             : 
     111             :     /**
     112             :      * Sets three-byte-primary CEs for a range of code points in code point order,
     113             :      * if it is worth doing; otherwise no change is made.
     114             :      * None of the code points in the range should have complex mappings so far
     115             :      * (expansions/contractions/prefixes).
     116             :      * @param start first code point
     117             :      * @param end last code point (inclusive)
     118             :      * @param primary primary weight for 'start'
     119             :      * @param step per-code point primary-weight increment
     120             :      * @param errorCode ICU in/out error code
     121             :      * @return TRUE if an OFFSET_TAG range was used for start..end
     122             :      */
     123             :     UBool maybeSetPrimaryRange(UChar32 start, UChar32 end,
     124             :                                uint32_t primary, int32_t step,
     125             :                                UErrorCode &errorCode);
     126             : 
     127             :     /**
     128             :      * Sets three-byte-primary CEs for a range of code points in code point order.
     129             :      * Sets range values if that is worth doing, or else individual values.
     130             :      * None of the code points in the range should have complex mappings so far
     131             :      * (expansions/contractions/prefixes).
     132             :      * @param start first code point
     133             :      * @param end last code point (inclusive)
     134             :      * @param primary primary weight for 'start'
     135             :      * @param step per-code point primary-weight increment
     136             :      * @param errorCode ICU in/out error code
     137             :      * @return the next primary after 'end': start primary incremented by ((end-start)+1)*step
     138             :      */
     139             :     uint32_t setPrimaryRangeAndReturnNext(UChar32 start, UChar32 end,
     140             :                                           uint32_t primary, int32_t step,
     141             :                                           UErrorCode &errorCode);
     142             : 
     143             :     /**
     144             :      * Copies all mappings from the src builder, with modifications.
     145             :      * This builder here must not be built yet, and should be empty.
     146             :      */
     147             :     void copyFrom(const CollationDataBuilder &src, const CEModifier &modifier,
     148             :                   UErrorCode &errorCode);
     149             : 
     150             :     void optimize(const UnicodeSet &set, UErrorCode &errorCode);
     151             :     void suppressContractions(const UnicodeSet &set, UErrorCode &errorCode);
     152             : 
     153           0 :     void enableFastLatin() { fastLatinEnabled = TRUE; }
     154             :     virtual void build(CollationData &data, UErrorCode &errorCode);
     155             : 
     156             :     /**
     157             :      * Looks up CEs for s and appends them to the ces array.
     158             :      * Does not handle normalization: s should be in FCD form.
     159             :      *
     160             :      * Does not write completely ignorable CEs.
     161             :      * Does not write beyond Collation::MAX_EXPANSION_LENGTH.
     162             :      *
     163             :      * @return incremented cesLength
     164             :      */
     165             :     int32_t getCEs(const UnicodeString &s, int64_t ces[], int32_t cesLength);
     166             :     int32_t getCEs(const UnicodeString &prefix, const UnicodeString &s,
     167             :                    int64_t ces[], int32_t cesLength);
     168             : 
     169             : protected:
     170             :     friend class CopyHelper;
     171             :     friend class DataBuilderCollationIterator;
     172             : 
     173             :     uint32_t getCE32FromOffsetCE32(UBool fromBase, UChar32 c, uint32_t ce32) const;
     174             : 
     175             :     int32_t addCE(int64_t ce, UErrorCode &errorCode);
     176             :     int32_t addCE32(uint32_t ce32, UErrorCode &errorCode);
     177             :     int32_t addConditionalCE32(const UnicodeString &context, uint32_t ce32, UErrorCode &errorCode);
     178             : 
     179           0 :     inline ConditionalCE32 *getConditionalCE32(int32_t index) const {
     180           0 :         return static_cast<ConditionalCE32 *>(conditionalCE32s[index]);
     181             :     }
     182           0 :     inline ConditionalCE32 *getConditionalCE32ForCE32(uint32_t ce32) const {
     183           0 :         return getConditionalCE32(Collation::indexFromCE32(ce32));
     184             :     }
     185             : 
     186           0 :     static uint32_t makeBuilderContextCE32(int32_t index) {
     187           0 :         return Collation::makeCE32FromTagAndIndex(Collation::BUILDER_DATA_TAG, index);
     188             :     }
     189           0 :     static inline UBool isBuilderContextCE32(uint32_t ce32) {
     190           0 :         return Collation::hasCE32Tag(ce32, Collation::BUILDER_DATA_TAG);
     191             :     }
     192             : 
     193             :     static uint32_t encodeOneCEAsCE32(int64_t ce);
     194             :     uint32_t encodeOneCE(int64_t ce, UErrorCode &errorCode);
     195             :     uint32_t encodeExpansion(const int64_t ces[], int32_t length, UErrorCode &errorCode);
     196             :     uint32_t encodeExpansion32(const int32_t newCE32s[], int32_t length, UErrorCode &errorCode);
     197             : 
     198             :     uint32_t copyFromBaseCE32(UChar32 c, uint32_t ce32, UBool withContext, UErrorCode &errorCode);
     199             :     /**
     200             :      * Copies base contractions to a list of ConditionalCE32.
     201             :      * Sets cond->next to the index of the first new item
     202             :      * and returns the index of the last new item.
     203             :      */
     204             :     int32_t copyContractionsFromBaseCE32(UnicodeString &context, UChar32 c, uint32_t ce32,
     205             :                                          ConditionalCE32 *cond, UErrorCode &errorCode);
     206             : 
     207             :     UBool getJamoCE32s(uint32_t jamoCE32s[], UErrorCode &errorCode);
     208             :     void setDigitTags(UErrorCode &errorCode);
     209             :     void setLeadSurrogates(UErrorCode &errorCode);
     210             : 
     211             :     void buildMappings(CollationData &data, UErrorCode &errorCode);
     212             : 
     213             :     void clearContexts();
     214             :     void buildContexts(UErrorCode &errorCode);
     215             :     uint32_t buildContext(ConditionalCE32 *head, UErrorCode &errorCode);
     216             :     int32_t addContextTrie(uint32_t defaultCE32, UCharsTrieBuilder &trieBuilder,
     217             :                            UErrorCode &errorCode);
     218             : 
     219             :     void buildFastLatinTable(CollationData &data, UErrorCode &errorCode);
     220             : 
     221             :     int32_t getCEs(const UnicodeString &s, int32_t start, int64_t ces[], int32_t cesLength);
     222             : 
     223           0 :     static UChar32 jamoCpFromIndex(int32_t i) {
     224             :         // 0 <= i < CollationData::JAMO_CE32S_LENGTH = 19 + 21 + 27
     225           0 :         if(i < Hangul::JAMO_L_COUNT) { return Hangul::JAMO_L_BASE + i; }
     226           0 :         i -= Hangul::JAMO_L_COUNT;
     227           0 :         if(i < Hangul::JAMO_V_COUNT) { return Hangul::JAMO_V_BASE + i; }
     228           0 :         i -= Hangul::JAMO_V_COUNT;
     229             :         // i < 27
     230           0 :         return Hangul::JAMO_T_BASE + 1 + i;
     231             :     }
     232             : 
     233             :     /** @see Collation::BUILDER_DATA_TAG */
     234             :     static const uint32_t IS_BUILDER_JAMO_CE32 = 0x100;
     235             : 
     236             :     const Normalizer2Impl &nfcImpl;
     237             :     const CollationData *base;
     238             :     const CollationSettings *baseSettings;
     239             :     UTrie2 *trie;
     240             :     UVector32 ce32s;
     241             :     UVector64 ce64s;
     242             :     UVector conditionalCE32s;  // vector of ConditionalCE32
     243             :     // Characters that have context (prefixes or contraction suffixes).
     244             :     UnicodeSet contextChars;
     245             :     // Serialized UCharsTrie structures for finalized contexts.
     246             :     UnicodeString contexts;
     247             :     UnicodeSet unsafeBackwardSet;
     248             :     UBool modified;
     249             : 
     250             :     UBool fastLatinEnabled;
     251             :     CollationFastLatinBuilder *fastLatinBuilder;
     252             : 
     253             :     DataBuilderCollationIterator *collIter;
     254             : };
     255             : 
     256             : U_NAMESPACE_END
     257             : 
     258             : #endif  // !UCONFIG_NO_COLLATION
     259             : #endif  // __COLLATIONDATABUILDER_H__

Generated by: LCOV version 1.13