LCOV - code coverage report
Current view: top level - intl/icu/source/common - patternprops.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 0 65 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 7 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *   Copyright (C) 2011, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : *   file name:  patternprops.cpp
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2011mar13
      14             : *   created by: Markus W. Scherer
      15             : */
      16             : 
      17             : #include "unicode/utypes.h"
      18             : #include "patternprops.h"
      19             : 
      20             : U_NAMESPACE_BEGIN
      21             : 
      22             : /*
      23             :  * One byte per Latin-1 character.
      24             :  * Bit 0 is set if either Pattern property is true,
      25             :  * bit 1 if Pattern_Syntax is true,
      26             :  * bit 2 if Pattern_White_Space is true.
      27             :  * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
      28             :  */
      29             : static const uint8_t latin1[256]={
      30             :     // WS: 9..D
      31             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
      32             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      33             :     // WS: 20  Syntax: 21..2F
      34             :     5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      35             :     // Syntax: 3A..40
      36             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
      37             :     3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      38             :     // Syntax: 5B..5E
      39             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
      40             :     // Syntax: 60
      41             :     3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      42             :     // Syntax: 7B..7E
      43             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
      44             :     // WS: 85
      45             :     0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      46             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      47             :     // Syntax: A1..A7, A9, AB, AC, AE
      48             :     0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
      49             :     // Syntax: B0, B1, B6, BB, BF
      50             :     3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
      51             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      52             :     // Syntax: D7
      53             :     0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
      54             :     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
      55             :     // Syntax: F7
      56             :     0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
      57             : };
      58             : 
      59             : /*
      60             :  * One byte per 32 characters from U+2000..U+303F indexing into
      61             :  * a small table of 32-bit data words.
      62             :  * The first two data words are all-zeros and all-ones.
      63             :  */
      64             : static const uint8_t index2000[130]={
      65             :     2, 3, 4, 0, 0, 0, 0, 0,  // 20xx
      66             :     0, 0, 0, 0, 5, 1, 1, 1,  // 21xx
      67             :     1, 1, 1, 1, 1, 1, 1, 1,  // 22xx
      68             :     1, 1, 1, 1, 1, 1, 1, 1,  // 23xx
      69             :     1, 1, 1, 0, 0, 0, 0, 0,  // 24xx
      70             :     1, 1, 1, 1, 1, 1, 1, 1,  // 25xx
      71             :     1, 1, 1, 1, 1, 1, 1, 1,  // 26xx
      72             :     1, 1, 1, 6, 7, 1, 1, 1,  // 27xx
      73             :     1, 1, 1, 1, 1, 1, 1, 1,  // 28xx
      74             :     1, 1, 1, 1, 1, 1, 1, 1,  // 29xx
      75             :     1, 1, 1, 1, 1, 1, 1, 1,  // 2Axx
      76             :     1, 1, 1, 1, 1, 1, 1, 1,  // 2Bxx
      77             :     0, 0, 0, 0, 0, 0, 0, 0,  // 2Cxx
      78             :     0, 0, 0, 0, 0, 0, 0, 0,  // 2Dxx
      79             :     1, 1, 1, 1, 0, 0, 0, 0,  // 2Exx
      80             :     0, 0, 0, 0, 0, 0, 0, 0,  // 2Fxx
      81             :     8, 9  // 3000..303F
      82             : };
      83             : 
      84             : /*
      85             :  * One 32-bit integer per 32 characters. Ranges of all-false and all-true
      86             :  * are mapped to the first two values, other ranges map to appropriate bit patterns.
      87             :  */
      88             : static const uint32_t syntax2000[]={
      89             :     0,
      90             :     0xffffffff,
      91             :     0xffff0000,  // 2: 2010..201F
      92             :     0x7fff00ff,  // 3: 2020..2027, 2030..203E
      93             :     0x7feffffe,  // 4: 2041..2053, 2055..205E
      94             :     0xffff0000,  // 5: 2190..219F
      95             :     0x003fffff,  // 6: 2760..2775
      96             :     0xfff00000,  // 7: 2794..279F
      97             :     0xffffff0e,  // 8: 3001..3003, 3008..301F
      98             :     0x00010001   // 9: 3020, 3030
      99             : };
     100             : 
     101             : /*
     102             :  * Same as syntax2000, but with additional bits set for the
     103             :  * Pattern_White_Space characters 200E 200F 2028 2029.
     104             :  */
     105             : static const uint32_t syntaxOrWhiteSpace2000[]={
     106             :     0,
     107             :     0xffffffff,
     108             :     0xffffc000,  // 2: 200E..201F
     109             :     0x7fff03ff,  // 3: 2020..2029, 2030..203E
     110             :     0x7feffffe,  // 4: 2041..2053, 2055..205E
     111             :     0xffff0000,  // 5: 2190..219F
     112             :     0x003fffff,  // 6: 2760..2775
     113             :     0xfff00000,  // 7: 2794..279F
     114             :     0xffffff0e,  // 8: 3001..3003, 3008..301F
     115             :     0x00010001   // 9: 3020, 3030
     116             : };
     117             : 
     118             : UBool
     119           0 : PatternProps::isSyntax(UChar32 c) {
     120           0 :     if(c<0) {
     121           0 :         return FALSE;
     122           0 :     } else if(c<=0xff) {
     123           0 :         return (UBool)(latin1[c]>>1)&1;
     124           0 :     } else if(c<0x2010) {
     125           0 :         return FALSE;
     126           0 :     } else if(c<=0x3030) {
     127           0 :         uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
     128           0 :         return (UBool)((bits>>(c&0x1f))&1);
     129           0 :     } else if(0xfd3e<=c && c<=0xfe46) {
     130           0 :         return c<=0xfd3f || 0xfe45<=c;
     131             :     } else {
     132           0 :         return FALSE;
     133             :     }
     134             : }
     135             : 
     136             : UBool
     137           0 : PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
     138           0 :     if(c<0) {
     139           0 :         return FALSE;
     140           0 :     } else if(c<=0xff) {
     141           0 :         return (UBool)(latin1[c]&1);
     142           0 :     } else if(c<0x200e) {
     143           0 :         return FALSE;
     144           0 :     } else if(c<=0x3030) {
     145           0 :         uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
     146           0 :         return (UBool)((bits>>(c&0x1f))&1);
     147           0 :     } else if(0xfd3e<=c && c<=0xfe46) {
     148           0 :         return c<=0xfd3f || 0xfe45<=c;
     149             :     } else {
     150           0 :         return FALSE;
     151             :     }
     152             : }
     153             : 
     154             : UBool
     155           0 : PatternProps::isWhiteSpace(UChar32 c) {
     156           0 :     if(c<0) {
     157           0 :         return FALSE;
     158           0 :     } else if(c<=0xff) {
     159           0 :         return (UBool)(latin1[c]>>2)&1;
     160           0 :     } else if(0x200e<=c && c<=0x2029) {
     161           0 :         return c<=0x200f || 0x2028<=c;
     162             :     } else {
     163           0 :         return FALSE;
     164             :     }
     165             : }
     166             : 
     167             : const UChar *
     168           0 : PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
     169           0 :     while(length>0 && isWhiteSpace(*s)) {
     170           0 :         ++s;
     171           0 :         --length;
     172             :     }
     173           0 :     return s;
     174             : }
     175             : 
     176             : const UChar *
     177           0 : PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
     178           0 :     if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
     179           0 :         return s;
     180             :     }
     181           0 :     int32_t start=0;
     182           0 :     int32_t limit=length;
     183           0 :     while(start<limit && isWhiteSpace(s[start])) {
     184           0 :         ++start;
     185             :     }
     186           0 :     if(start<limit) {
     187             :         // There is non-white space at start; we will not move limit below that,
     188             :         // so we need not test start<limit in the loop.
     189           0 :         while(isWhiteSpace(s[limit-1])) {
     190           0 :             --limit;
     191             :         }
     192             :     }
     193           0 :     length=limit-start;
     194           0 :     return s+start;
     195             : }
     196             : 
     197             : UBool
     198           0 : PatternProps::isIdentifier(const UChar *s, int32_t length) {
     199           0 :     if(length<=0) {
     200           0 :         return FALSE;
     201             :     }
     202           0 :     const UChar *limit=s+length;
     203           0 :     do {
     204           0 :         if(isSyntaxOrWhiteSpace(*s++)) {
     205           0 :             return FALSE;
     206             :         }
     207           0 :     } while(s<limit);
     208           0 :     return TRUE;
     209             : }
     210             : 
     211             : const UChar *
     212           0 : PatternProps::skipIdentifier(const UChar *s, int32_t length) {
     213           0 :     while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
     214           0 :         ++s;
     215           0 :         --length;
     216             :     }
     217           0 :     return s;
     218             : }
     219             : 
     220             : U_NAMESPACE_END

Generated by: LCOV version 1.13