LCOV - code coverage report
Current view: top level - intl/icu/source/i18n - collationfcd.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 17 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : * Copyright (C) 2012-2014, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : * collationfcd.h
       9             : *
      10             : * created on: 2012aug18
      11             : * created by: Markus W. Scherer
      12             : */
      13             : 
      14             : #ifndef __COLLATIONFCD_H__
      15             : #define __COLLATIONFCD_H__
      16             : 
      17             : #include "unicode/utypes.h"
      18             : 
      19             : #if !UCONFIG_NO_COLLATION
      20             : 
      21             : #include "unicode/utf16.h"
      22             : 
      23             : U_NAMESPACE_BEGIN
      24             : 
      25             : /**
      26             :  * Data and functions for the FCD check fast path.
      27             :  *
      28             :  * The fast path looks at a pair of 16-bit code units and checks
      29             :  * whether there is an FCD boundary between them;
      30             :  * there is if the first unit has a trailing ccc=0 (!hasTccc(first))
      31             :  * or the second unit has a leading ccc=0 (!hasLccc(second)),
      32             :  * or both.
      33             :  * When the fast path finds a possible non-boundary,
      34             :  * then the FCD check slow path looks at the actual sequence of FCD values.
      35             :  *
      36             :  * This is a pure optimization.
      37             :  * The fast path must at least find all possible non-boundaries.
      38             :  * If the fast path is too pessimistic, it costs performance.
      39             :  *
      40             :  * For a pair of BMP characters, the fast path tests are precise (1 bit per character).
      41             :  *
      42             :  * For a supplementary code point, the two units are its lead and trail surrogates.
      43             :  * We set hasTccc(lead)=true if any of its 1024 associated supplementary code points
      44             :  * has lccc!=0 or tccc!=0.
      45             :  * We set hasLccc(trail)=true for all trail surrogates.
      46             :  * As a result, we leave the fast path if the lead surrogate might start a
      47             :  * supplementary code point that is not FCD-inert.
      48             :  * (So the fast path need not detect that there is a surrogate pair,
      49             :  * nor look ahead to the next full code point.)
      50             :  *
      51             :  * hasLccc(lead)=true if any of its 1024 associated supplementary code points
      52             :  * has lccc!=0, for fast boundary checking between BMP & supplementary.
      53             :  *
      54             :  * hasTccc(trail)=false:
      55             :  * It should only be tested for unpaired trail surrogates which are FCD-inert.
      56             :  */
      57             : class U_I18N_API CollationFCD {
      58             : public:
      59           0 :     static inline UBool hasLccc(UChar32 c) {
      60             :         // assert c <= 0xffff
      61             :         // c can be negative, e.g., U_SENTINEL from UCharIterator;
      62             :         // that is handled in the first test.
      63             :         int32_t i;
      64             :         return
      65             :             // U+0300 is the first character with lccc!=0.
      66           0 :             c >= 0x300 &&
      67           0 :             (i = lcccIndex[c >> 5]) != 0 &&
      68           0 :             (lcccBits[i] & ((uint32_t)1 << (c & 0x1f))) != 0;
      69             :     }
      70             : 
      71           0 :     static inline UBool hasTccc(UChar32 c) {
      72             :         // assert c <= 0xffff
      73             :         // c can be negative, e.g., U_SENTINEL from UCharIterator;
      74             :         // that is handled in the first test.
      75             :         int32_t i;
      76             :         return
      77             :             // U+00C0 is the first character with tccc!=0.
      78           0 :             c >= 0xc0 &&
      79           0 :             (i = tcccIndex[c >> 5]) != 0 &&
      80           0 :             (tcccBits[i] & ((uint32_t)1 << (c & 0x1f))) != 0;
      81             :     }
      82             : 
      83           0 :     static inline UBool mayHaveLccc(UChar32 c) {
      84             :         // Handles all of Unicode 0..10FFFF.
      85             :         // c can be negative, e.g., U_SENTINEL.
      86             :         // U+0300 is the first character with lccc!=0.
      87           0 :         if(c < 0x300) { return FALSE; }
      88           0 :         if(c > 0xffff) { c = U16_LEAD(c); }
      89             :         int32_t i;
      90             :         return
      91           0 :             (i = lcccIndex[c >> 5]) != 0 &&
      92           0 :             (lcccBits[i] & ((uint32_t)1 << (c & 0x1f))) != 0;
      93             :     }
      94             : 
      95             :     /**
      96             :      * Tibetan composite vowel signs (U+0F73, U+0F75, U+0F81)
      97             :      * must be decomposed before reaching the core collation code,
      98             :      * or else some sequences including them, even ones passing the FCD check,
      99             :      * do not yield canonically equivalent results.
     100             :      *
     101             :      * This is a fast and imprecise test.
     102             :      *
     103             :      * @param c a code point
     104             :      * @return TRUE if c is U+0F73, U+0F75 or U+0F81 or one of several other Tibetan characters
     105             :      */
     106           0 :     static inline UBool maybeTibetanCompositeVowel(UChar32 c) {
     107           0 :         return (c & 0x1fff01) == 0xf01;
     108             :     }
     109             : 
     110             :     /**
     111             :      * Tibetan composite vowel signs (U+0F73, U+0F75, U+0F81)
     112             :      * must be decomposed before reaching the core collation code,
     113             :      * or else some sequences including them, even ones passing the FCD check,
     114             :      * do not yield canonically equivalent results.
     115             :      *
     116             :      * They have distinct lccc/tccc combinations: 129/130 or 129/132.
     117             :      *
     118             :      * @param fcd16 the FCD value (lccc/tccc combination) of a code point
     119             :      * @return TRUE if fcd16 is from U+0F73, U+0F75 or U+0F81
     120             :      */
     121           0 :     static inline UBool isFCD16OfTibetanCompositeVowel(uint16_t fcd16) {
     122           0 :         return fcd16 == 0x8182 || fcd16 == 0x8184;
     123             :     }
     124             : 
     125             : private:
     126             :     CollationFCD();  // No instantiation.
     127             : 
     128             :     static const uint8_t lcccIndex[2048];
     129             :     static const uint8_t tcccIndex[2048];
     130             :     static const uint32_t lcccBits[];
     131             :     static const uint32_t tcccBits[];
     132             : };
     133             : 
     134             : U_NAMESPACE_END
     135             : 
     136             : #endif  // !UCONFIG_NO_COLLATION
     137             : #endif  // __COLLATIONFCD_H__

Generated by: LCOV version 1.13