LCOV - code coverage report
Current view: top level - intl/icu/source/common - unisetspan.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 6 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2007, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  unisetspan.h
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2007mar01
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #ifndef __UNISETSPAN_H__
      20             : #define __UNISETSPAN_H__
      21             : 
      22             : #include "unicode/utypes.h"
      23             : #include "unicode/uniset.h"
      24             : 
      25             : U_NAMESPACE_BEGIN
      26             : 
      27             : /*
      28             :  * Implement span() etc. for a set with strings.
      29             :  * Avoid recursion because of its exponential complexity.
      30             :  * Instead, try multiple paths at once and track them with an IndexList.
      31             :  */
      32             : class UnicodeSetStringSpan : public UMemory {
      33             : public:
      34             :     /*
      35             :      * Which span() variant will be used?
      36             :      * The object is either built for one variant and used once,
      37             :      * or built for all and may be used many times.
      38             :      */
      39             :     enum {
      40             :         FWD             = 0x20,
      41             :         BACK            = 0x10,
      42             :         UTF16           = 8,
      43             :         UTF8            = 4,
      44             :         CONTAINED       = 2,
      45             :         NOT_CONTAINED   = 1,
      46             : 
      47             :         ALL             = 0x3f,
      48             : 
      49             :         FWD_UTF16_CONTAINED     = FWD  | UTF16 |     CONTAINED,
      50             :         FWD_UTF16_NOT_CONTAINED = FWD  | UTF16 | NOT_CONTAINED,
      51             :         FWD_UTF8_CONTAINED      = FWD  | UTF8  |     CONTAINED,
      52             :         FWD_UTF8_NOT_CONTAINED  = FWD  | UTF8  | NOT_CONTAINED,
      53             :         BACK_UTF16_CONTAINED    = BACK | UTF16 |     CONTAINED,
      54             :         BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED,
      55             :         BACK_UTF8_CONTAINED     = BACK | UTF8  |     CONTAINED,
      56             :         BACK_UTF8_NOT_CONTAINED = BACK | UTF8  | NOT_CONTAINED
      57             :     };
      58             : 
      59             :     UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which);
      60             : 
      61             :     // Copy constructor. Assumes which==ALL for a frozen set.
      62             :     UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings);
      63             : 
      64             :     ~UnicodeSetStringSpan();
      65             : 
      66             :     /*
      67             :      * Do the strings need to be checked in span() etc.?
      68             :      * @return TRUE if strings need to be checked (call span() here),
      69             :      *         FALSE if not (use a BMPSet for best performance).
      70             :      */
      71             :     inline UBool needsStringSpanUTF16();
      72             :     inline UBool needsStringSpanUTF8();
      73             : 
      74             :     // For fast UnicodeSet::contains(c).
      75             :     inline UBool contains(UChar32 c) const;
      76             : 
      77             :     int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
      78             : 
      79             :     int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
      80             : 
      81             :     int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
      82             : 
      83             :     int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
      84             : 
      85             : private:
      86             :     // Special spanLength byte values.
      87             :     enum {
      88             :         // The spanLength is >=0xfe.
      89             :         LONG_SPAN=0xfe,
      90             :         // All code points in the string are contained in the parent set.
      91             :         ALL_CP_CONTAINED=0xff
      92             :     };
      93             : 
      94             :     // Add a starting or ending string character to the spanNotSet
      95             :     // so that a character span ends before any string.
      96             :     void addToSpanNotSet(UChar32 c);
      97             : 
      98             :     int32_t spanNot(const UChar *s, int32_t length) const;
      99             :     int32_t spanNotBack(const UChar *s, int32_t length) const;
     100             :     int32_t spanNotUTF8(const uint8_t *s, int32_t length) const;
     101             :     int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const;
     102             : 
     103             :     // Set for span(). Same as parent but without strings.
     104             :     UnicodeSet spanSet;
     105             : 
     106             :     // Set for span(not contained).
     107             :     // Same as spanSet, plus characters that start or end strings.
     108             :     UnicodeSet *pSpanNotSet;
     109             : 
     110             :     // The strings of the parent set.
     111             :     const UVector &strings;
     112             : 
     113             :     // Pointer to the UTF-8 string lengths.
     114             :     // Also pointer to further allocated storage for meta data and
     115             :     // UTF-8 string contents as necessary.
     116             :     int32_t *utf8Lengths;
     117             : 
     118             :     // Pointer to the part of the (utf8Lengths) memory block that stores
     119             :     // the lengths of span(), spanBack() etc. for each string.
     120             :     uint8_t *spanLengths;
     121             : 
     122             :     // Pointer to the part of the (utf8Lengths) memory block that stores
     123             :     // the UTF-8 versions of the parent set's strings.
     124             :     uint8_t *utf8;
     125             : 
     126             :     // Number of bytes for all UTF-8 versions of strings together.
     127             :     int32_t utf8Length;
     128             : 
     129             :     // Maximum lengths of relevant strings.
     130             :     int32_t maxLength16;
     131             :     int32_t maxLength8;
     132             : 
     133             :     // Set up for all variants of span()?
     134             :     UBool all;
     135             : 
     136             :     // Memory for small numbers and lengths of strings.
     137             :     // For example, for 8 strings:
     138             :     // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
     139             :     // = 112 bytes = int32_t[28].
     140             :     int32_t staticLengths[32];
     141             : };
     142             : 
     143           0 : UBool UnicodeSetStringSpan::needsStringSpanUTF16() {
     144           0 :     return (UBool)(maxLength16!=0);
     145             : }
     146             : 
     147           0 : UBool UnicodeSetStringSpan::needsStringSpanUTF8() {
     148           0 :     return (UBool)(maxLength8!=0);
     149             : }
     150             : 
     151           0 : UBool UnicodeSetStringSpan::contains(UChar32 c) const {
     152           0 :     return spanSet.contains(c);
     153             : }
     154             : 
     155             : U_NAMESPACE_END
     156             : 
     157             : #endif

Generated by: LCOV version 1.13