LCOV - code coverage report
Current view: top level - intl/icu/source/common/unicode - stringtriebuilder.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 52 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 34 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *   Copyright (C) 2010-2012,2014, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : *   file name:  stringtriebuilder.h
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2010dec24
      14             : *   created by: Markus W. Scherer
      15             : */
      16             : 
      17             : #ifndef __STRINGTRIEBUILDER_H__
      18             : #define __STRINGTRIEBUILDER_H__
      19             : 
      20             : #include "unicode/utypes.h"
      21             : #include "unicode/uobject.h"
      22             : 
      23             : /**
      24             :  * \file
      25             :  * \brief C++ API: Builder API for trie builders
      26             :  */
      27             : 
      28             : // Forward declaration.
      29             : struct UHashtable;
      30             : typedef struct UHashtable UHashtable;
      31             : 
      32             : /**
      33             :  * Build options for BytesTrieBuilder and CharsTrieBuilder.
      34             :  * @stable ICU 4.8
      35             :  */
      36             : enum UStringTrieBuildOption {
      37             :     /**
      38             :      * Builds a trie quickly.
      39             :      * @stable ICU 4.8
      40             :      */
      41             :     USTRINGTRIE_BUILD_FAST,
      42             :     /**
      43             :      * Builds a trie more slowly, attempting to generate
      44             :      * a shorter but equivalent serialization.
      45             :      * This build option also uses more memory.
      46             :      *
      47             :      * This option can be effective when many integer values are the same
      48             :      * and string/byte sequence suffixes can be shared.
      49             :      * Runtime speed is not expected to improve.
      50             :      * @stable ICU 4.8
      51             :      */
      52             :     USTRINGTRIE_BUILD_SMALL
      53             : };
      54             : 
      55             : U_NAMESPACE_BEGIN
      56             : 
      57             : /**
      58             :  * Base class for string trie builder classes.
      59             :  *
      60             :  * This class is not intended for public subclassing.
      61             :  * @stable ICU 4.8
      62             :  */
      63             : class U_COMMON_API StringTrieBuilder : public UObject {
      64             : public:
      65             : #ifndef U_HIDE_INTERNAL_API
      66             :     /** @internal */
      67             :     static UBool hashNode(const void *node);
      68             :     /** @internal */
      69             :     static UBool equalNodes(const void *left, const void *right);
      70             : #endif  /* U_HIDE_INTERNAL_API */
      71             : 
      72             : protected:
      73             :     // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
      74             :     // or else the compiler will create a public default constructor.
      75             :     /** @internal */
      76             :     StringTrieBuilder();
      77             :     /** @internal */
      78             :     virtual ~StringTrieBuilder();
      79             : 
      80             : #ifndef U_HIDE_INTERNAL_API
      81             :     /** @internal */
      82             :     void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
      83             :     /** @internal */
      84             :     void deleteCompactBuilder();
      85             : 
      86             :     /** @internal */
      87             :     void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
      88             : 
      89             :     /** @internal */
      90             :     int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
      91             :     /** @internal */
      92             :     int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
      93             : #endif  /* U_HIDE_INTERNAL_API */
      94             : 
      95             :     class Node;
      96             : 
      97             : #ifndef U_HIDE_INTERNAL_API
      98             :     /** @internal */
      99             :     Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
     100             :     /** @internal */
     101             :     Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
     102             :                             int32_t length, UErrorCode &errorCode);
     103             : #endif  /* U_HIDE_INTERNAL_API */
     104             : 
     105             :     /** @internal */
     106             :     virtual int32_t getElementStringLength(int32_t i) const = 0;
     107             :     /** @internal */
     108             :     virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
     109             :     /** @internal */
     110             :     virtual int32_t getElementValue(int32_t i) const = 0;
     111             : 
     112             :     // Finds the first unit index after this one where
     113             :     // the first and last element have different units again.
     114             :     /** @internal */
     115             :     virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
     116             : 
     117             :     // Number of different units at unitIndex.
     118             :     /** @internal */
     119             :     virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
     120             :     /** @internal */
     121             :     virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
     122             :     /** @internal */
     123             :     virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
     124             : 
     125             :     /** @internal */
     126             :     virtual UBool matchNodesCanHaveValues() const = 0;
     127             : 
     128             :     /** @internal */
     129             :     virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
     130             :     /** @internal */
     131             :     virtual int32_t getMinLinearMatch() const = 0;
     132             :     /** @internal */
     133             :     virtual int32_t getMaxLinearMatchLength() const = 0;
     134             : 
     135             : #ifndef U_HIDE_INTERNAL_API
     136             :     // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
     137             :     /** @internal */
     138             :     static const int32_t kMaxBranchLinearSubNodeLength=5;
     139             : 
     140             :     // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
     141             :     // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
     142             :     /** @internal */
     143             :     static const int32_t kMaxSplitBranchLevels=14;
     144             : 
     145             :     /**
     146             :      * Makes sure that there is only one unique node registered that is
     147             :      * equivalent to newNode.
     148             :      * @param newNode Input node. The builder takes ownership.
     149             :      * @param errorCode ICU in/out UErrorCode.
     150             :                         Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
     151             :      * @return newNode if it is the first of its kind, or
     152             :      *         an equivalent node if newNode is a duplicate.
     153             :      * @internal
     154             :      */
     155             :     Node *registerNode(Node *newNode, UErrorCode &errorCode);
     156             :     /**
     157             :      * Makes sure that there is only one unique FinalValueNode registered
     158             :      * with this value.
     159             :      * Avoids creating a node if the value is a duplicate.
     160             :      * @param value A final value.
     161             :      * @param errorCode ICU in/out UErrorCode.
     162             :                         Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
     163             :      * @return A FinalValueNode with the given value.
     164             :      * @internal
     165             :      */
     166             :     Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
     167             : #endif  /* U_HIDE_INTERNAL_API */
     168             : 
     169             :     /*
     170             :      * C++ note:
     171             :      * registerNode() and registerFinalValue() take ownership of their input nodes,
     172             :      * and only return owned nodes.
     173             :      * If they see a failure UErrorCode, they will delete the input node.
     174             :      * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
     175             :      * If there is a failure, they return NULL.
     176             :      *
     177             :      * NULL Node pointers can be safely passed into other Nodes because
     178             :      * they call the static Node::hashCode() which checks for a NULL pointer first.
     179             :      *
     180             :      * Therefore, as long as builder functions register a new node,
     181             :      * they need to check for failures only before explicitly dereferencing
     182             :      * a Node pointer, or before setting a new UErrorCode.
     183             :      */
     184             : 
     185             :     // Hash set of nodes, maps from nodes to integer 1.
     186             :     /** @internal */
     187             :     UHashtable *nodes;
     188             : 
     189             :     // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
     190             :     // it is needed for layout of other objects.
     191             :     /** @internal */
     192           0 :     class Node : public UObject {
     193             :     public:
     194           0 :         Node(int32_t initialHash) : hash(initialHash), offset(0) {}
     195           0 :         inline int32_t hashCode() const { return hash; }
     196             :         // Handles node==NULL.
     197           0 :         static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
     198             :         // Base class operator==() compares the actual class types.
     199             :         virtual UBool operator==(const Node &other) const;
     200             :         inline UBool operator!=(const Node &other) const { return !operator==(other); }
     201             :         /**
     202             :          * Traverses the Node graph and numbers branch edges, with rightmost edges first.
     203             :          * This is to avoid writing a duplicate node twice.
     204             :          *
     205             :          * Branch nodes in this trie data structure are not symmetric.
     206             :          * Most branch edges "jump" to other nodes but the rightmost branch edges
     207             :          * just continue without a jump.
     208             :          * Therefore, write() must write the rightmost branch edge last
     209             :          * (trie units are written backwards), and must write it at that point even if
     210             :          * it is a duplicate of a node previously written elsewhere.
     211             :          *
     212             :          * This function visits and marks right branch edges first.
     213             :          * Edges are numbered with increasingly negative values because we share the
     214             :          * offset field which gets positive values when nodes are written.
     215             :          * A branch edge also remembers the first number for any of its edges.
     216             :          *
     217             :          * When a further-left branch edge has a number in the range of the rightmost
     218             :          * edge's numbers, then it will be written as part of the required right edge
     219             :          * and we can avoid writing it first.
     220             :          *
     221             :          * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
     222             :          * edge numbers.
     223             :          *
     224             :          * @param edgeNumber The first edge number for this node and its sub-nodes.
     225             :          * @return An edge number that is at least the maximum-negative
     226             :          *         of the input edge number and the numbers of this node and all of its sub-nodes.
     227             :          */
     228             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     229             :         // write() must set the offset to a positive value.
     230             :         virtual void write(StringTrieBuilder &builder) = 0;
     231             :         // See markRightEdgesFirst.
     232           0 :         inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
     233             :                                                StringTrieBuilder &builder) {
     234             :             // Note: Edge numbers are negative, lastRight<=firstRight.
     235             :             // If offset>0 then this node and its sub-nodes have been written already
     236             :             // and we need not write them again.
     237             :             // If this node is part of the unwritten right branch edge,
     238             :             // then we wait until that is written.
     239           0 :             if(offset<0 && (offset<lastRight || firstRight<offset)) {
     240           0 :                 write(builder);
     241             :             }
     242           0 :         }
     243           0 :         inline int32_t getOffset() const { return offset; }
     244             :     protected:
     245             :         int32_t hash;
     246             :         int32_t offset;
     247             :     };
     248             : 
     249             : #ifndef U_HIDE_INTERNAL_API
     250             :     // This class should not be overridden because
     251             :     // registerFinalValue() compares a stack-allocated FinalValueNode
     252             :     // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
     253             :     // with the input node, and the
     254             :     // !Node::operator==(other) used inside FinalValueNode::operator==(other)
     255             :     // will be false if the typeid's are different.
     256             :     /** @internal */
     257           0 :     class FinalValueNode : public Node {
     258             :     public:
     259           0 :         FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
     260             :         virtual UBool operator==(const Node &other) const;
     261             :         virtual void write(StringTrieBuilder &builder);
     262             :     protected:
     263             :         int32_t value;
     264             :     };
     265             : #endif  /* U_HIDE_INTERNAL_API */
     266             : 
     267             :     // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
     268             :     // it is needed for layout of other objects.
     269             :     /**
     270             :      * @internal 
     271             :      */
     272           0 :     class ValueNode : public Node {
     273             :     public:
     274           0 :         ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
     275             :         virtual UBool operator==(const Node &other) const;
     276           0 :         void setValue(int32_t v) {
     277           0 :             hasValue=TRUE;
     278           0 :             value=v;
     279           0 :             hash=hash*37+v;
     280           0 :         }
     281             :     protected:
     282             :         UBool hasValue;
     283             :         int32_t value;
     284             :     };
     285             : 
     286             : #ifndef U_HIDE_INTERNAL_API
     287             :     /** 
     288             :      * @internal 
     289             :      */
     290           0 :     class IntermediateValueNode : public ValueNode {
     291             :     public:
     292           0 :         IntermediateValueNode(int32_t v, Node *nextNode)
     293           0 :                 : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
     294             :         virtual UBool operator==(const Node &other) const;
     295             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     296             :         virtual void write(StringTrieBuilder &builder);
     297             :     protected:
     298             :         Node *next;
     299             :     };
     300             : #endif  /* U_HIDE_INTERNAL_API */
     301             : 
     302             :     // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
     303             :     // it is needed for layout of other objects.
     304             :     /**
     305             :      * @internal 
     306             :      */
     307           0 :     class LinearMatchNode : public ValueNode {
     308             :     public:
     309           0 :         LinearMatchNode(int32_t len, Node *nextNode)
     310           0 :                 : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
     311           0 :                   length(len), next(nextNode) {}
     312             :         virtual UBool operator==(const Node &other) const;
     313             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     314             :     protected:
     315             :         int32_t length;
     316             :         Node *next;
     317             :     };
     318             : 
     319             : #ifndef U_HIDE_INTERNAL_API
     320             :     /**
     321             :      * @internal 
     322             :      */
     323           0 :     class BranchNode : public Node {
     324             :     public:
     325           0 :         BranchNode(int32_t initialHash) : Node(initialHash) {}
     326             :     protected:
     327             :         int32_t firstEdgeNumber;
     328             :     };
     329             : 
     330             :     /**
     331             :      * @internal 
     332             :      */
     333           0 :     class ListBranchNode : public BranchNode {
     334             :     public:
     335           0 :         ListBranchNode() : BranchNode(0x444444), length(0) {}
     336             :         virtual UBool operator==(const Node &other) const;
     337             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     338             :         virtual void write(StringTrieBuilder &builder);
     339             :         // Adds a unit with a final value.
     340           0 :         void add(int32_t c, int32_t value) {
     341           0 :             units[length]=(char16_t)c;
     342           0 :             equal[length]=NULL;
     343           0 :             values[length]=value;
     344           0 :             ++length;
     345           0 :             hash=(hash*37+c)*37+value;
     346           0 :         }
     347             :         // Adds a unit which leads to another match node.
     348           0 :         void add(int32_t c, Node *node) {
     349           0 :             units[length]=(char16_t)c;
     350           0 :             equal[length]=node;
     351           0 :             values[length]=0;
     352           0 :             ++length;
     353           0 :             hash=(hash*37+c)*37+hashCode(node);
     354           0 :         }
     355             :     protected:
     356             :         Node *equal[kMaxBranchLinearSubNodeLength];  // NULL means "has final value".
     357             :         int32_t length;
     358             :         int32_t values[kMaxBranchLinearSubNodeLength];
     359             :         char16_t units[kMaxBranchLinearSubNodeLength];
     360             :     };
     361             : 
     362             :     /**
     363             :      * @internal 
     364             :      */
     365           0 :     class SplitBranchNode : public BranchNode {
     366             :     public:
     367           0 :         SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
     368           0 :                 : BranchNode(((0x555555*37+middleUnit)*37+
     369           0 :                               hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
     370           0 :                   unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
     371             :         virtual UBool operator==(const Node &other) const;
     372             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     373             :         virtual void write(StringTrieBuilder &builder);
     374             :     protected:
     375             :         char16_t unit;
     376             :         Node *lessThan;
     377             :         Node *greaterOrEqual;
     378             :     };
     379             : 
     380             :     // Branch head node, for writing the actual node lead unit.
     381             :     /** @internal */
     382           0 :     class BranchHeadNode : public ValueNode {
     383             :     public:
     384           0 :         BranchHeadNode(int32_t len, Node *subNode)
     385           0 :                 : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
     386           0 :                   length(len), next(subNode) {}
     387             :         virtual UBool operator==(const Node &other) const;
     388             :         virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
     389             :         virtual void write(StringTrieBuilder &builder);
     390             :     protected:
     391             :         int32_t length;
     392             :         Node *next;  // A branch sub-node.
     393             :     };
     394             : #endif  /* U_HIDE_INTERNAL_API */
     395             : 
     396             :     /** @internal */
     397             :     virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
     398             :                                         Node *nextNode) const = 0;
     399             : 
     400             :     /** @internal */
     401             :     virtual int32_t write(int32_t unit) = 0;
     402             :     /** @internal */
     403             :     virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
     404             :     /** @internal */
     405             :     virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
     406             :     /** @internal */
     407             :     virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
     408             :     /** @internal */
     409             :     virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
     410             : };
     411             : 
     412             : U_NAMESPACE_END
     413             : 
     414             : #endif  // __STRINGTRIEBUILDER_H__

Generated by: LCOV version 1.13