LCOV - code coverage report
Current view: top level - intl/icu/source/common - bmpset.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 2 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 1 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : ******************************************************************************
       5             : *
       6             : *   Copyright (C) 2007, International Business Machines
       7             : *   Corporation and others.  All Rights Reserved.
       8             : *
       9             : ******************************************************************************
      10             : *   file name:  bmpset.h
      11             : *   encoding:   UTF-8
      12             : *   tab size:   8 (not used)
      13             : *   indentation:4
      14             : *
      15             : *   created on: 2007jan29
      16             : *   created by: Markus W. Scherer
      17             : */
      18             : 
      19             : #ifndef __BMPSET_H__
      20             : #define __BMPSET_H__
      21             : 
      22             : #include "unicode/utypes.h"
      23             : #include "unicode/uniset.h"
      24             : 
      25             : U_NAMESPACE_BEGIN
      26             : 
      27             : /*
      28             :  * Helper class for frozen UnicodeSets, implements contains() and span()
      29             :  * optimized for BMP code points. Structured to be UTF-8-friendly.
      30             :  *
      31             :  * ASCII: Look up bytes.
      32             :  * 2-byte characters: Bits organized vertically.
      33             :  * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF,
      34             :  *                    with mixed for illegal ranges.
      35             :  * Supplementary characters: Call contains() on the parent set.
      36             :  */
      37             : class BMPSet : public UMemory {
      38             : public:
      39             :     BMPSet(const int32_t *parentList, int32_t parentListLength);
      40             :     BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength);
      41             :     virtual ~BMPSet();
      42             : 
      43             :     virtual UBool contains(UChar32 c) const;
      44             : 
      45             :     /*
      46             :      * Span the initial substring for which each character c has spanCondition==contains(c).
      47             :      * It must be s<limit and spanCondition==0 or 1.
      48             :      * @return The string pointer which limits the span.
      49             :      */
      50             :     const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
      51             : 
      52             :     /*
      53             :      * Span the trailing substring for which each character c has spanCondition==contains(c).
      54             :      * It must be s<limit and spanCondition==0 or 1.
      55             :      * @return The string pointer which starts the span.
      56             :      */
      57             :     const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const;
      58             : 
      59             :     /*
      60             :      * Span the initial substring for which each character c has spanCondition==contains(c).
      61             :      * It must be length>0 and spanCondition==0 or 1.
      62             :      * @return The string pointer which limits the span.
      63             :      */
      64             :     const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
      65             : 
      66             :     /*
      67             :      * Span the trailing substring for which each character c has spanCondition==contains(c).
      68             :      * It must be length>0 and spanCondition==0 or 1.
      69             :      * @return The start of the span.
      70             :      */
      71             :     int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const;
      72             : 
      73             : private:
      74             :     void initBits();
      75             :     void overrideIllegal();
      76             : 
      77             :     /**
      78             :      * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the
      79             :      * binary search is restricted for finding code points in a certain range.
      80             :      *
      81             :      * For restricting the search for finding in the range start..end,
      82             :      * pass in
      83             :      *   lo=findCodePoint(start) and
      84             :      *   hi=findCodePoint(end)
      85             :      * with 0<=lo<=hi<len.
      86             :      * findCodePoint(c) defaults to lo=0 and hi=len-1.
      87             :      *
      88             :      * @param c a character in a subrange of MIN_VALUE..MAX_VALUE
      89             :      * @param lo The lowest index to be returned.
      90             :      * @param hi The highest index to be returned.
      91             :      * @return the smallest integer i in the range lo..hi,
      92             :      *         inclusive, such that c < list[i]
      93             :      */
      94             :     int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const;
      95             : 
      96             :     inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const;
      97             : 
      98             :     /*
      99             :      * One byte per ASCII character, or trail byte in lead position.
     100             :      * 0 or 1 for ASCII characters.
     101             :      * The value for trail bytes is the result of contains(FFFD)
     102             :      * for faster validity checking at runtime.
     103             :      */
     104             :     UBool asciiBytes[0xc0];
     105             : 
     106             :     /*
     107             :      * One bit per code point from U+0000..U+07FF.
     108             :      * The bits are organized vertically; consecutive code points
     109             :      * correspond to the same bit positions in consecutive table words.
     110             :      * With code point parts
     111             :      *   lead=c{10..6}
     112             :      *   trail=c{5..0}
     113             :      * it is set.contains(c)==(table7FF[trail] bit lead)
     114             :      *
     115             :      * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD)
     116             :      * for faster validity checking at runtime.
     117             :      */
     118             :     uint32_t table7FF[64];
     119             : 
     120             :     /*
     121             :      * One bit per 64 BMP code points.
     122             :      * The bits are organized vertically; consecutive 64-code point blocks
     123             :      * correspond to the same bit position in consecutive table words.
     124             :      * With code point parts
     125             :      *   lead=c{15..12}
     126             :      *   t1=c{11..6}
     127             :      * test bits (lead+16) and lead in bmpBlockBits[t1].
     128             :      * If the upper bit is 0, then the lower bit indicates if contains(c)
     129             :      * for all code points in the 64-block.
     130             :      * If the upper bit is 1, then the block is mixed and set.contains(c)
     131             :      * must be called.
     132             :      *
     133             :      * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to
     134             :      * the result of contains(FFFD) for faster validity checking at runtime.
     135             :      */
     136             :     uint32_t bmpBlockBits[64];
     137             : 
     138             :     /*
     139             :      * Inversion list indexes for restricted binary searches in
     140             :      * findCodePoint(), from
     141             :      * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000).
     142             :      * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are
     143             :      * always looked up in the bit tables.
     144             :      * The last pair of indexes is for finding supplementary code points.
     145             :      */
     146             :     int32_t list4kStarts[18];
     147             : 
     148             :     /*
     149             :      * The inversion list of the parent set, for the slower contains() implementation
     150             :      * for mixed BMP blocks and for supplementary code points.
     151             :      * The list is terminated with list[listLength-1]=0x110000.
     152             :      */
     153             :     const int32_t *list;
     154             :     int32_t listLength;
     155             : };
     156             : 
     157           0 : inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
     158           0 :     return (UBool)(findCodePoint(c, lo, hi) & 1);
     159             : }
     160             : 
     161             : U_NAMESPACE_END
     162             : 
     163             : #endif

Generated by: LCOV version 1.13