LCOV - code coverage report
Current view: top level - intl/icu/source/common - propname.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 38 133 28.6 %
Date: 2017-07-14 16:53:18 Functions: 6 19 31.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : **********************************************************************
       5             : * Copyright (c) 2002-2014, International Business Machines
       6             : * Corporation and others.  All Rights Reserved.
       7             : **********************************************************************
       8             : * Author: Alan Liu
       9             : * Created: October 30 2002
      10             : * Since: ICU 2.4
      11             : * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
      12             : **********************************************************************
      13             : */
      14             : #include "propname.h"
      15             : #include "unicode/uchar.h"
      16             : #include "unicode/udata.h"
      17             : #include "unicode/uscript.h"
      18             : #include "umutex.h"
      19             : #include "cmemory.h"
      20             : #include "cstring.h"
      21             : #include "uarrsort.h"
      22             : #include "uinvchar.h"
      23             : 
      24             : #define INCLUDED_FROM_PROPNAME_CPP
      25             : #include "propname_data.h"
      26             : 
      27             : U_CDECL_BEGIN
      28             : 
      29             : /**
      30             :  * Get the next non-ignorable ASCII character from a property name
      31             :  * and lowercases it.
      32             :  * @return ((advance count for the name)<<8)|character
      33             :  */
      34             : static inline int32_t
      35           0 : getASCIIPropertyNameChar(const char *name) {
      36             :     int32_t i;
      37             :     char c;
      38             : 
      39             :     /* Ignore delimiters '-', '_', and ASCII White_Space */
      40           0 :     for(i=0;
      41           0 :         (c=name[i++])==0x2d || c==0x5f ||
      42           0 :         c==0x20 || (0x09<=c && c<=0x0d);
      43             :     ) {}
      44             : 
      45           0 :     if(c!=0) {
      46           0 :         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
      47             :     } else {
      48           0 :         return i<<8;
      49             :     }
      50             : }
      51             : 
      52             : /**
      53             :  * Get the next non-ignorable EBCDIC character from a property name
      54             :  * and lowercases it.
      55             :  * @return ((advance count for the name)<<8)|character
      56             :  */
      57             : static inline int32_t
      58           0 : getEBCDICPropertyNameChar(const char *name) {
      59             :     int32_t i;
      60             :     char c;
      61             : 
      62             :     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
      63           0 :     for(i=0;
      64           0 :         (c=name[i++])==0x60 || c==0x6d ||
      65           0 :         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
      66             :     ) {}
      67             : 
      68           0 :     if(c!=0) {
      69           0 :         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
      70             :     } else {
      71           0 :         return i<<8;
      72             :     }
      73             : }
      74             : 
      75             : /**
      76             :  * Unicode property names and property value names are compared "loosely".
      77             :  *
      78             :  * UCD.html 4.0.1 says:
      79             :  *   For all property names, property value names, and for property values for
      80             :  *   Enumerated, Binary, or Catalog properties, use the following
      81             :  *   loose matching rule:
      82             :  *
      83             :  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
      84             :  *
      85             :  * This function does just that, for (char *) name strings.
      86             :  * It is almost identical to ucnv_compareNames() but also ignores
      87             :  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
      88             :  *
      89             :  * @internal
      90             :  */
      91             : 
      92             : U_CAPI int32_t U_EXPORT2
      93           0 : uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
      94             :     int32_t rc, r1, r2;
      95             : 
      96             :     for(;;) {
      97           0 :         r1=getASCIIPropertyNameChar(name1);
      98           0 :         r2=getASCIIPropertyNameChar(name2);
      99             : 
     100             :         /* If we reach the ends of both strings then they match */
     101           0 :         if(((r1|r2)&0xff)==0) {
     102           0 :             return 0;
     103             :         }
     104             : 
     105             :         /* Compare the lowercased characters */
     106           0 :         if(r1!=r2) {
     107           0 :             rc=(r1&0xff)-(r2&0xff);
     108           0 :             if(rc!=0) {
     109           0 :                 return rc;
     110             :             }
     111             :         }
     112             : 
     113           0 :         name1+=r1>>8;
     114           0 :         name2+=r2>>8;
     115             :     }
     116             : }
     117             : 
     118             : U_CAPI int32_t U_EXPORT2
     119           0 : uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
     120             :     int32_t rc, r1, r2;
     121             : 
     122             :     for(;;) {
     123           0 :         r1=getEBCDICPropertyNameChar(name1);
     124           0 :         r2=getEBCDICPropertyNameChar(name2);
     125             : 
     126             :         /* If we reach the ends of both strings then they match */
     127           0 :         if(((r1|r2)&0xff)==0) {
     128           0 :             return 0;
     129             :         }
     130             : 
     131             :         /* Compare the lowercased characters */
     132           0 :         if(r1!=r2) {
     133           0 :             rc=(r1&0xff)-(r2&0xff);
     134           0 :             if(rc!=0) {
     135           0 :                 return rc;
     136             :             }
     137             :         }
     138             : 
     139           0 :         name1+=r1>>8;
     140           0 :         name2+=r2>>8;
     141             :     }
     142             : }
     143             : 
     144             : U_CDECL_END
     145             : 
     146             : U_NAMESPACE_BEGIN
     147             : 
     148         379 : int32_t PropNameData::findProperty(int32_t property) {
     149         379 :     int32_t i=1;  // valueMaps index, initially after numRanges
     150         758 :     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
     151             :         // Read and skip the start and limit of this range.
     152         758 :         int32_t start=valueMaps[i];
     153         758 :         int32_t limit=valueMaps[i+1];
     154         758 :         i+=2;
     155         758 :         if(property<start) {
     156           0 :             break;
     157             :         }
     158         758 :         if(property<limit) {
     159         379 :             return i+(property-start)*2;
     160             :         }
     161         379 :         i+=(limit-start)*2;  // Skip all entries for this range.
     162             :     }
     163           0 :     return 0;
     164             : }
     165             : 
     166         379 : int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
     167         379 :     if(valueMapIndex==0) {
     168           0 :         return 0;  // The property does not have named values.
     169             :     }
     170         379 :     ++valueMapIndex;  // Skip the BytesTrie offset.
     171         379 :     int32_t numRanges=valueMaps[valueMapIndex++];
     172         379 :     if(numRanges<0x10) {
     173             :         // Ranges of values.
     174         379 :         for(; numRanges>0; --numRanges) {
     175             :             // Read and skip the start and limit of this range.
     176         379 :             int32_t start=valueMaps[valueMapIndex];
     177         379 :             int32_t limit=valueMaps[valueMapIndex+1];
     178         379 :             valueMapIndex+=2;
     179         379 :             if(value<start) {
     180           0 :                 break;
     181             :             }
     182         379 :             if(value<limit) {
     183         379 :                 return valueMaps[valueMapIndex+value-start];
     184             :             }
     185           0 :             valueMapIndex+=limit-start;  // Skip all entries for this range.
     186             :         }
     187             :     } else {
     188             :         // List of values.
     189           0 :         int32_t valuesStart=valueMapIndex;
     190           0 :         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
     191           0 :         do {
     192           0 :             int32_t v=valueMaps[valueMapIndex];
     193           0 :             if(value<v) {
     194           0 :                 break;
     195             :             }
     196           0 :             if(value==v) {
     197           0 :                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
     198             :             }
     199             :         } while(++valueMapIndex<nameGroupOffsetsStart);
     200             :     }
     201           0 :     return 0;
     202             : }
     203             : 
     204         379 : const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
     205         379 :     int32_t numNames=*nameGroup++;
     206         379 :     if(nameIndex<0 || numNames<=nameIndex) {
     207           0 :         return NULL;
     208             :     }
     209             :     // Skip nameIndex names.
     210         379 :     for(; nameIndex>0; --nameIndex) {
     211           0 :         nameGroup=uprv_strchr(nameGroup, 0)+1;
     212             :     }
     213         379 :     if(*nameGroup==0) {
     214           0 :         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
     215             :     }
     216         379 :     return nameGroup;
     217             : }
     218             : 
     219           0 : UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
     220           0 :     if(name==NULL) {
     221           0 :         return FALSE;
     222             :     }
     223           0 :     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
     224             :     char c;
     225           0 :     while((c=*name++)!=0) {
     226           0 :         c=uprv_invCharToLowercaseAscii(c);
     227             :         // Ignore delimiters '-', '_', and ASCII White_Space.
     228           0 :         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
     229           0 :             continue;
     230             :         }
     231           0 :         if(!USTRINGTRIE_HAS_NEXT(result)) {
     232           0 :             return FALSE;
     233             :         }
     234           0 :         result=trie.next((uint8_t)c);
     235             :     }
     236           0 :     return USTRINGTRIE_HAS_VALUE(result);
     237             : }
     238             : 
     239           0 : const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
     240           0 :     int32_t valueMapIndex=findProperty(property);
     241           0 :     if(valueMapIndex==0) {
     242           0 :         return NULL;  // Not a known property.
     243             :     }
     244           0 :     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
     245             : }
     246             : 
     247         379 : const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
     248         379 :     int32_t valueMapIndex=findProperty(property);
     249         379 :     if(valueMapIndex==0) {
     250           0 :         return NULL;  // Not a known property.
     251             :     }
     252         379 :     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
     253         379 :     if(nameGroupOffset==0) {
     254           0 :         return NULL;
     255             :     }
     256         379 :     return getName(nameGroups+nameGroupOffset, nameChoice);
     257             : }
     258             : 
     259           0 : int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
     260           0 :     BytesTrie trie(bytesTries+bytesTrieOffset);
     261           0 :     if(containsName(trie, alias)) {
     262           0 :         return trie.getValue();
     263             :     } else {
     264           0 :         return UCHAR_INVALID_CODE;
     265             :     }
     266             : }
     267             : 
     268           0 : int32_t PropNameData::getPropertyEnum(const char *alias) {
     269           0 :     return getPropertyOrValueEnum(0, alias);
     270             : }
     271             : 
     272           0 : int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
     273           0 :     int32_t valueMapIndex=findProperty(property);
     274           0 :     if(valueMapIndex==0) {
     275           0 :         return UCHAR_INVALID_CODE;  // Not a known property.
     276             :     }
     277           0 :     valueMapIndex=valueMaps[valueMapIndex+1];
     278           0 :     if(valueMapIndex==0) {
     279           0 :         return UCHAR_INVALID_CODE;  // The property does not have named values.
     280             :     }
     281             :     // valueMapIndex is the start of the property's valueMap,
     282             :     // where the first word is the BytesTrie offset.
     283           0 :     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
     284             : }
     285             : U_NAMESPACE_END
     286             : 
     287             : //----------------------------------------------------------------------
     288             : // Public API implementation
     289             : 
     290             : U_CAPI const char* U_EXPORT2
     291           0 : u_getPropertyName(UProperty property,
     292             :                   UPropertyNameChoice nameChoice) {
     293             :     U_NAMESPACE_USE
     294           0 :     return PropNameData::getPropertyName(property, nameChoice);
     295             : }
     296             : 
     297             : U_CAPI UProperty U_EXPORT2
     298           0 : u_getPropertyEnum(const char* alias) {
     299             :     U_NAMESPACE_USE
     300           0 :     return (UProperty)PropNameData::getPropertyEnum(alias);
     301             : }
     302             : 
     303             : U_CAPI const char* U_EXPORT2
     304         379 : u_getPropertyValueName(UProperty property,
     305             :                        int32_t value,
     306             :                        UPropertyNameChoice nameChoice) {
     307             :     U_NAMESPACE_USE
     308         379 :     return PropNameData::getPropertyValueName(property, value, nameChoice);
     309             : }
     310             : 
     311             : U_CAPI int32_t U_EXPORT2
     312           0 : u_getPropertyValueEnum(UProperty property,
     313             :                        const char* alias) {
     314             :     U_NAMESPACE_USE
     315           0 :     return PropNameData::getPropertyValueEnum(property, alias);
     316             : }
     317             : 
     318             : U_CAPI const char*  U_EXPORT2
     319           0 : uscript_getName(UScriptCode scriptCode){
     320             :     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
     321           0 :                                   U_LONG_PROPERTY_NAME);
     322             : }
     323             : 
     324             : U_CAPI const char*  U_EXPORT2
     325         379 : uscript_getShortName(UScriptCode scriptCode){
     326             :     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
     327         379 :                                   U_SHORT_PROPERTY_NAME);
     328             : }

Generated by: LCOV version 1.13