Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (c) 2002-2014, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : * Author: Alan Liu
9 : * Created: October 30 2002
10 : * Since: ICU 2.4
11 : * 2010nov19 Markus Scherer Rewrite for formatVersion 2.
12 : **********************************************************************
13 : */
14 : #include "propname.h"
15 : #include "unicode/uchar.h"
16 : #include "unicode/udata.h"
17 : #include "unicode/uscript.h"
18 : #include "umutex.h"
19 : #include "cmemory.h"
20 : #include "cstring.h"
21 : #include "uarrsort.h"
22 : #include "uinvchar.h"
23 :
24 : #define INCLUDED_FROM_PROPNAME_CPP
25 : #include "propname_data.h"
26 :
27 : U_CDECL_BEGIN
28 :
29 : /**
30 : * Get the next non-ignorable ASCII character from a property name
31 : * and lowercases it.
32 : * @return ((advance count for the name)<<8)|character
33 : */
34 : static inline int32_t
35 0 : getASCIIPropertyNameChar(const char *name) {
36 : int32_t i;
37 : char c;
38 :
39 : /* Ignore delimiters '-', '_', and ASCII White_Space */
40 0 : for(i=0;
41 0 : (c=name[i++])==0x2d || c==0x5f ||
42 0 : c==0x20 || (0x09<=c && c<=0x0d);
43 : ) {}
44 :
45 0 : if(c!=0) {
46 0 : return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
47 : } else {
48 0 : return i<<8;
49 : }
50 : }
51 :
52 : /**
53 : * Get the next non-ignorable EBCDIC character from a property name
54 : * and lowercases it.
55 : * @return ((advance count for the name)<<8)|character
56 : */
57 : static inline int32_t
58 0 : getEBCDICPropertyNameChar(const char *name) {
59 : int32_t i;
60 : char c;
61 :
62 : /* Ignore delimiters '-', '_', and EBCDIC White_Space */
63 0 : for(i=0;
64 0 : (c=name[i++])==0x60 || c==0x6d ||
65 0 : c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
66 : ) {}
67 :
68 0 : if(c!=0) {
69 0 : return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
70 : } else {
71 0 : return i<<8;
72 : }
73 : }
74 :
75 : /**
76 : * Unicode property names and property value names are compared "loosely".
77 : *
78 : * UCD.html 4.0.1 says:
79 : * For all property names, property value names, and for property values for
80 : * Enumerated, Binary, or Catalog properties, use the following
81 : * loose matching rule:
82 : *
83 : * LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
84 : *
85 : * This function does just that, for (char *) name strings.
86 : * It is almost identical to ucnv_compareNames() but also ignores
87 : * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
88 : *
89 : * @internal
90 : */
91 :
92 : U_CAPI int32_t U_EXPORT2
93 0 : uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
94 : int32_t rc, r1, r2;
95 :
96 : for(;;) {
97 0 : r1=getASCIIPropertyNameChar(name1);
98 0 : r2=getASCIIPropertyNameChar(name2);
99 :
100 : /* If we reach the ends of both strings then they match */
101 0 : if(((r1|r2)&0xff)==0) {
102 0 : return 0;
103 : }
104 :
105 : /* Compare the lowercased characters */
106 0 : if(r1!=r2) {
107 0 : rc=(r1&0xff)-(r2&0xff);
108 0 : if(rc!=0) {
109 0 : return rc;
110 : }
111 : }
112 :
113 0 : name1+=r1>>8;
114 0 : name2+=r2>>8;
115 : }
116 : }
117 :
118 : U_CAPI int32_t U_EXPORT2
119 0 : uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
120 : int32_t rc, r1, r2;
121 :
122 : for(;;) {
123 0 : r1=getEBCDICPropertyNameChar(name1);
124 0 : r2=getEBCDICPropertyNameChar(name2);
125 :
126 : /* If we reach the ends of both strings then they match */
127 0 : if(((r1|r2)&0xff)==0) {
128 0 : return 0;
129 : }
130 :
131 : /* Compare the lowercased characters */
132 0 : if(r1!=r2) {
133 0 : rc=(r1&0xff)-(r2&0xff);
134 0 : if(rc!=0) {
135 0 : return rc;
136 : }
137 : }
138 :
139 0 : name1+=r1>>8;
140 0 : name2+=r2>>8;
141 : }
142 : }
143 :
144 : U_CDECL_END
145 :
146 : U_NAMESPACE_BEGIN
147 :
148 379 : int32_t PropNameData::findProperty(int32_t property) {
149 379 : int32_t i=1; // valueMaps index, initially after numRanges
150 758 : for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
151 : // Read and skip the start and limit of this range.
152 758 : int32_t start=valueMaps[i];
153 758 : int32_t limit=valueMaps[i+1];
154 758 : i+=2;
155 758 : if(property<start) {
156 0 : break;
157 : }
158 758 : if(property<limit) {
159 379 : return i+(property-start)*2;
160 : }
161 379 : i+=(limit-start)*2; // Skip all entries for this range.
162 : }
163 0 : return 0;
164 : }
165 :
166 379 : int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
167 379 : if(valueMapIndex==0) {
168 0 : return 0; // The property does not have named values.
169 : }
170 379 : ++valueMapIndex; // Skip the BytesTrie offset.
171 379 : int32_t numRanges=valueMaps[valueMapIndex++];
172 379 : if(numRanges<0x10) {
173 : // Ranges of values.
174 379 : for(; numRanges>0; --numRanges) {
175 : // Read and skip the start and limit of this range.
176 379 : int32_t start=valueMaps[valueMapIndex];
177 379 : int32_t limit=valueMaps[valueMapIndex+1];
178 379 : valueMapIndex+=2;
179 379 : if(value<start) {
180 0 : break;
181 : }
182 379 : if(value<limit) {
183 379 : return valueMaps[valueMapIndex+value-start];
184 : }
185 0 : valueMapIndex+=limit-start; // Skip all entries for this range.
186 : }
187 : } else {
188 : // List of values.
189 0 : int32_t valuesStart=valueMapIndex;
190 0 : int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
191 0 : do {
192 0 : int32_t v=valueMaps[valueMapIndex];
193 0 : if(value<v) {
194 0 : break;
195 : }
196 0 : if(value==v) {
197 0 : return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
198 : }
199 : } while(++valueMapIndex<nameGroupOffsetsStart);
200 : }
201 0 : return 0;
202 : }
203 :
204 379 : const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
205 379 : int32_t numNames=*nameGroup++;
206 379 : if(nameIndex<0 || numNames<=nameIndex) {
207 0 : return NULL;
208 : }
209 : // Skip nameIndex names.
210 379 : for(; nameIndex>0; --nameIndex) {
211 0 : nameGroup=uprv_strchr(nameGroup, 0)+1;
212 : }
213 379 : if(*nameGroup==0) {
214 0 : return NULL; // no name (Property[Value]Aliases.txt has "n/a")
215 : }
216 379 : return nameGroup;
217 : }
218 :
219 0 : UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
220 0 : if(name==NULL) {
221 0 : return FALSE;
222 : }
223 0 : UStringTrieResult result=USTRINGTRIE_NO_VALUE;
224 : char c;
225 0 : while((c=*name++)!=0) {
226 0 : c=uprv_invCharToLowercaseAscii(c);
227 : // Ignore delimiters '-', '_', and ASCII White_Space.
228 0 : if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
229 0 : continue;
230 : }
231 0 : if(!USTRINGTRIE_HAS_NEXT(result)) {
232 0 : return FALSE;
233 : }
234 0 : result=trie.next((uint8_t)c);
235 : }
236 0 : return USTRINGTRIE_HAS_VALUE(result);
237 : }
238 :
239 0 : const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
240 0 : int32_t valueMapIndex=findProperty(property);
241 0 : if(valueMapIndex==0) {
242 0 : return NULL; // Not a known property.
243 : }
244 0 : return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
245 : }
246 :
247 379 : const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
248 379 : int32_t valueMapIndex=findProperty(property);
249 379 : if(valueMapIndex==0) {
250 0 : return NULL; // Not a known property.
251 : }
252 379 : int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
253 379 : if(nameGroupOffset==0) {
254 0 : return NULL;
255 : }
256 379 : return getName(nameGroups+nameGroupOffset, nameChoice);
257 : }
258 :
259 0 : int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
260 0 : BytesTrie trie(bytesTries+bytesTrieOffset);
261 0 : if(containsName(trie, alias)) {
262 0 : return trie.getValue();
263 : } else {
264 0 : return UCHAR_INVALID_CODE;
265 : }
266 : }
267 :
268 0 : int32_t PropNameData::getPropertyEnum(const char *alias) {
269 0 : return getPropertyOrValueEnum(0, alias);
270 : }
271 :
272 0 : int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
273 0 : int32_t valueMapIndex=findProperty(property);
274 0 : if(valueMapIndex==0) {
275 0 : return UCHAR_INVALID_CODE; // Not a known property.
276 : }
277 0 : valueMapIndex=valueMaps[valueMapIndex+1];
278 0 : if(valueMapIndex==0) {
279 0 : return UCHAR_INVALID_CODE; // The property does not have named values.
280 : }
281 : // valueMapIndex is the start of the property's valueMap,
282 : // where the first word is the BytesTrie offset.
283 0 : return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
284 : }
285 : U_NAMESPACE_END
286 :
287 : //----------------------------------------------------------------------
288 : // Public API implementation
289 :
290 : U_CAPI const char* U_EXPORT2
291 0 : u_getPropertyName(UProperty property,
292 : UPropertyNameChoice nameChoice) {
293 : U_NAMESPACE_USE
294 0 : return PropNameData::getPropertyName(property, nameChoice);
295 : }
296 :
297 : U_CAPI UProperty U_EXPORT2
298 0 : u_getPropertyEnum(const char* alias) {
299 : U_NAMESPACE_USE
300 0 : return (UProperty)PropNameData::getPropertyEnum(alias);
301 : }
302 :
303 : U_CAPI const char* U_EXPORT2
304 379 : u_getPropertyValueName(UProperty property,
305 : int32_t value,
306 : UPropertyNameChoice nameChoice) {
307 : U_NAMESPACE_USE
308 379 : return PropNameData::getPropertyValueName(property, value, nameChoice);
309 : }
310 :
311 : U_CAPI int32_t U_EXPORT2
312 0 : u_getPropertyValueEnum(UProperty property,
313 : const char* alias) {
314 : U_NAMESPACE_USE
315 0 : return PropNameData::getPropertyValueEnum(property, alias);
316 : }
317 :
318 : U_CAPI const char* U_EXPORT2
319 0 : uscript_getName(UScriptCode scriptCode){
320 : return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
321 0 : U_LONG_PROPERTY_NAME);
322 : }
323 :
324 : U_CAPI const char* U_EXPORT2
325 379 : uscript_getShortName(UScriptCode scriptCode){
326 : return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
327 379 : U_SHORT_PROPERTY_NAME);
328 : }
|