Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 2003-2016, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : * file name: usprep.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:4
14 : *
15 : * created on: 2003jul2
16 : * created by: Ram Viswanadha
17 : */
18 :
19 : #include "unicode/utypes.h"
20 :
21 : #if !UCONFIG_NO_IDNA
22 :
23 : #include "unicode/usprep.h"
24 :
25 : #include "unicode/normalizer2.h"
26 : #include "unicode/ustring.h"
27 : #include "unicode/uchar.h"
28 : #include "unicode/uversion.h"
29 : #include "umutex.h"
30 : #include "cmemory.h"
31 : #include "sprpimpl.h"
32 : #include "ustr_imp.h"
33 : #include "uhash.h"
34 : #include "cstring.h"
35 : #include "udataswp.h"
36 : #include "ucln_cmn.h"
37 : #include "ubidi_props.h"
38 : #include "uprops.h"
39 :
40 : U_NAMESPACE_USE
41 :
42 : U_CDECL_BEGIN
43 :
44 : /*
45 : Static cache for already opened StringPrep profiles
46 : */
47 : static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 : static icu::UInitOnce gSharedDataInitOnce;
49 :
50 : static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51 :
52 : /* format version of spp file */
53 : //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54 :
55 : /* the Unicode version of the sprep data */
56 : static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57 :
58 : /* Profile names must be aligned to UStringPrepProfileType */
59 : static const char * const PROFILE_NAMES[] = {
60 : "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 : "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 : "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 : "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 : "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 : "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 : "rfc3722", /* USPREP_RFC3722_ISCSI */
67 : "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 : "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 : "rfc4011", /* USPREP_RFC4011_MIB */
70 : "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 : "rfc4505", /* USPREP_RFC4505_TRACE */
72 : "rfc4518", /* USPREP_RFC4518_LDAP */
73 : "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
74 : };
75 :
76 : static UBool U_CALLCONV
77 0 : isSPrepAcceptable(void * /* context */,
78 : const char * /* type */,
79 : const char * /* name */,
80 : const UDataInfo *pInfo) {
81 0 : if(
82 0 : pInfo->size>=20 &&
83 0 : pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84 0 : pInfo->charsetFamily==U_CHARSET_FAMILY &&
85 0 : pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
86 0 : pInfo->dataFormat[1]==0x50 &&
87 0 : pInfo->dataFormat[2]==0x52 &&
88 0 : pInfo->dataFormat[3]==0x50 &&
89 0 : pInfo->formatVersion[0]==3 &&
90 0 : pInfo->formatVersion[2]==UTRIE_SHIFT &&
91 0 : pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92 : ) {
93 : //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 0 : uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95 0 : return TRUE;
96 : } else {
97 0 : return FALSE;
98 : }
99 : }
100 :
101 : static int32_t U_CALLCONV
102 0 : getSPrepFoldingOffset(uint32_t data) {
103 :
104 0 : return (int32_t)data;
105 :
106 : }
107 :
108 : /* hashes an entry */
109 : static int32_t U_CALLCONV
110 0 : hashEntry(const UHashTok parm) {
111 0 : UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112 : UHashTok namekey, pathkey;
113 0 : namekey.pointer = b->name;
114 0 : pathkey.pointer = b->path;
115 0 : return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116 : }
117 :
118 : /* compares two entries */
119 : static UBool U_CALLCONV
120 0 : compareEntries(const UHashTok p1, const UHashTok p2) {
121 0 : UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122 0 : UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123 : UHashTok name1, name2, path1, path2;
124 0 : name1.pointer = b1->name;
125 0 : name2.pointer = b2->name;
126 0 : path1.pointer = b1->path;
127 0 : path2.pointer = b2->path;
128 0 : return ((UBool)(uhash_compareChars(name1, name2) &
129 0 : uhash_compareChars(path1, path2)));
130 : }
131 :
132 : static void
133 0 : usprep_unload(UStringPrepProfile* data){
134 0 : udata_close(data->sprepData);
135 0 : }
136 :
137 : static int32_t
138 0 : usprep_internal_flushCache(UBool noRefCount){
139 0 : UStringPrepProfile *profile = NULL;
140 0 : UStringPrepKey *key = NULL;
141 0 : int32_t pos = UHASH_FIRST;
142 0 : int32_t deletedNum = 0;
143 : const UHashElement *e;
144 :
145 : /*
146 : * if shared data hasn't even been lazy evaluated yet
147 : * return 0
148 : */
149 0 : umtx_lock(&usprepMutex);
150 0 : if (SHARED_DATA_HASHTABLE == NULL) {
151 0 : umtx_unlock(&usprepMutex);
152 0 : return 0;
153 : }
154 :
155 : /*creates an enumeration to iterate through every element in the table */
156 0 : while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157 : {
158 0 : profile = (UStringPrepProfile *) e->value.pointer;
159 0 : key = (UStringPrepKey *) e->key.pointer;
160 :
161 0 : if ((noRefCount== FALSE && profile->refCount == 0) ||
162 : noRefCount== TRUE) {
163 0 : deletedNum++;
164 0 : uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165 :
166 : /* unload the data */
167 0 : usprep_unload(profile);
168 :
169 0 : if(key->name != NULL) {
170 0 : uprv_free(key->name);
171 0 : key->name=NULL;
172 : }
173 0 : if(key->path != NULL) {
174 0 : uprv_free(key->path);
175 0 : key->path=NULL;
176 : }
177 0 : uprv_free(profile);
178 0 : uprv_free(key);
179 : }
180 :
181 : }
182 0 : umtx_unlock(&usprepMutex);
183 :
184 0 : return deletedNum;
185 : }
186 :
187 : /* Works just like ucnv_flushCache()
188 : static int32_t
189 : usprep_flushCache(){
190 : return usprep_internal_flushCache(FALSE);
191 : }
192 : */
193 :
194 0 : static UBool U_CALLCONV usprep_cleanup(void){
195 0 : if (SHARED_DATA_HASHTABLE != NULL) {
196 0 : usprep_internal_flushCache(TRUE);
197 0 : if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198 0 : uhash_close(SHARED_DATA_HASHTABLE);
199 0 : SHARED_DATA_HASHTABLE = NULL;
200 : }
201 : }
202 0 : gSharedDataInitOnce.reset();
203 0 : return (SHARED_DATA_HASHTABLE == NULL);
204 : }
205 : U_CDECL_END
206 :
207 :
208 : /** Initializes the cache for resources */
209 : static void U_CALLCONV
210 0 : createCache(UErrorCode &status) {
211 0 : SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212 0 : if (U_FAILURE(status)) {
213 0 : SHARED_DATA_HASHTABLE = NULL;
214 : }
215 0 : ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216 0 : }
217 :
218 : static void
219 0 : initCache(UErrorCode *status) {
220 0 : umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
221 0 : }
222 :
223 : static UBool U_CALLCONV
224 0 : loadData(UStringPrepProfile* profile,
225 : const char* path,
226 : const char* name,
227 : const char* type,
228 : UErrorCode* errorCode) {
229 : /* load Unicode SPREP data from file */
230 0 : UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231 : UDataMemory *dataMemory;
232 0 : const int32_t *p=NULL;
233 : const uint8_t *pb;
234 : UVersionInfo normUnicodeVersion;
235 : int32_t normUniVer, sprepUniVer, normCorrVer;
236 :
237 0 : if(errorCode==NULL || U_FAILURE(*errorCode)) {
238 0 : return 0;
239 : }
240 :
241 : /* open the data outside the mutex block */
242 : //TODO: change the path
243 0 : dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244 0 : if(U_FAILURE(*errorCode)) {
245 0 : return FALSE;
246 : }
247 :
248 0 : p=(const int32_t *)udata_getMemory(dataMemory);
249 0 : pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250 0 : utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251 0 : _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252 :
253 :
254 0 : if(U_FAILURE(*errorCode)) {
255 0 : udata_close(dataMemory);
256 0 : return FALSE;
257 : }
258 :
259 : /* in the mutex block, set the data for this process */
260 0 : umtx_lock(&usprepMutex);
261 0 : if(profile->sprepData==NULL) {
262 0 : profile->sprepData=dataMemory;
263 0 : dataMemory=NULL;
264 0 : uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265 0 : uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266 : } else {
267 0 : p=(const int32_t *)udata_getMemory(profile->sprepData);
268 : }
269 0 : umtx_unlock(&usprepMutex);
270 : /* initialize some variables */
271 0 : profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272 :
273 0 : u_getUnicodeVersion(normUnicodeVersion);
274 0 : normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
275 0 : (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276 0 : sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
277 0 : (dataVersion[2] << 8 ) + (dataVersion[3]);
278 0 : normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279 :
280 0 : if(U_FAILURE(*errorCode)){
281 0 : udata_close(dataMemory);
282 0 : return FALSE;
283 : }
284 0 : if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 0 : normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 0 : ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287 : ){
288 0 : *errorCode = U_INVALID_FORMAT_ERROR;
289 0 : udata_close(dataMemory);
290 0 : return FALSE;
291 : }
292 0 : profile->isDataLoaded = TRUE;
293 :
294 : /* if a different thread set it first, then close the extra data */
295 0 : if(dataMemory!=NULL) {
296 0 : udata_close(dataMemory); /* NULL if it was set correctly */
297 : }
298 :
299 :
300 0 : return profile->isDataLoaded;
301 : }
302 :
303 : static UStringPrepProfile*
304 0 : usprep_getProfile(const char* path,
305 : const char* name,
306 : UErrorCode *status){
307 :
308 0 : UStringPrepProfile* profile = NULL;
309 :
310 0 : initCache(status);
311 :
312 0 : if(U_FAILURE(*status)){
313 0 : return NULL;
314 : }
315 :
316 : UStringPrepKey stackKey;
317 : /*
318 : * const is cast way to save malloc, strcpy and free calls
319 : * we use the passed in pointers for fetching the data from the
320 : * hash table which is safe
321 : */
322 0 : stackKey.name = (char*) name;
323 0 : stackKey.path = (char*) path;
324 :
325 : /* fetch the data from the cache */
326 0 : umtx_lock(&usprepMutex);
327 0 : profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
328 0 : if(profile != NULL) {
329 0 : profile->refCount++;
330 : }
331 0 : umtx_unlock(&usprepMutex);
332 :
333 0 : if(profile == NULL) {
334 : /* else load the data and put the data in the cache */
335 0 : LocalMemory<UStringPrepProfile> newProfile;
336 0 : if(newProfile.allocateInsteadAndReset() == NULL) {
337 0 : *status = U_MEMORY_ALLOCATION_ERROR;
338 0 : return NULL;
339 : }
340 :
341 : /* load the data */
342 0 : if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
343 0 : return NULL;
344 : }
345 :
346 : /* get the options */
347 0 : newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348 0 : newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
349 :
350 0 : if(newProfile->checkBiDi) {
351 0 : newProfile->bdp = ubidi_getSingleton();
352 : }
353 :
354 0 : LocalMemory<UStringPrepKey> key;
355 0 : LocalMemory<char> keyName;
356 0 : LocalMemory<char> keyPath;
357 0 : if( key.allocateInsteadAndReset() == NULL ||
358 0 : keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
359 0 : (path != NULL &&
360 0 : keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
361 : ) {
362 0 : *status = U_MEMORY_ALLOCATION_ERROR;
363 0 : usprep_unload(newProfile.getAlias());
364 0 : return NULL;
365 : }
366 :
367 0 : umtx_lock(&usprepMutex);
368 : // If another thread already inserted the same key/value, refcount and cleanup our thread data
369 0 : profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
370 0 : if(profile != NULL) {
371 0 : profile->refCount++;
372 0 : usprep_unload(newProfile.getAlias());
373 : }
374 : else {
375 : /* initialize the key members */
376 0 : key->name = keyName.orphan();
377 0 : uprv_strcpy(key->name, name);
378 0 : if(path != NULL){
379 0 : key->path = keyPath.orphan();
380 0 : uprv_strcpy(key->path, path);
381 : }
382 0 : profile = newProfile.orphan();
383 :
384 : /* add the data object to the cache */
385 0 : profile->refCount = 1;
386 0 : uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
387 : }
388 0 : umtx_unlock(&usprepMutex);
389 : }
390 :
391 0 : return profile;
392 : }
393 :
394 : U_CAPI UStringPrepProfile* U_EXPORT2
395 0 : usprep_open(const char* path,
396 : const char* name,
397 : UErrorCode* status){
398 :
399 0 : if(status == NULL || U_FAILURE(*status)){
400 0 : return NULL;
401 : }
402 :
403 : /* initialize the profile struct members */
404 0 : return usprep_getProfile(path,name,status);
405 : }
406 :
407 : U_CAPI UStringPrepProfile* U_EXPORT2
408 0 : usprep_openByType(UStringPrepProfileType type,
409 : UErrorCode* status) {
410 0 : if(status == NULL || U_FAILURE(*status)){
411 0 : return NULL;
412 : }
413 0 : int32_t index = (int32_t)type;
414 0 : if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
415 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
416 0 : return NULL;
417 : }
418 0 : return usprep_open(NULL, PROFILE_NAMES[index], status);
419 : }
420 :
421 : U_CAPI void U_EXPORT2
422 0 : usprep_close(UStringPrepProfile* profile){
423 0 : if(profile==NULL){
424 0 : return;
425 : }
426 :
427 0 : umtx_lock(&usprepMutex);
428 : /* decrement the ref count*/
429 0 : if(profile->refCount > 0){
430 0 : profile->refCount--;
431 : }
432 0 : umtx_unlock(&usprepMutex);
433 :
434 : }
435 :
436 : U_CFUNC void
437 0 : uprv_syntaxError(const UChar* rules,
438 : int32_t pos,
439 : int32_t rulesLen,
440 : UParseError* parseError){
441 0 : if(parseError == NULL){
442 0 : return;
443 : }
444 0 : parseError->offset = pos;
445 0 : parseError->line = 0 ; // we are not using line numbers
446 :
447 : // for pre-context
448 0 : int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
449 0 : int32_t limit = pos;
450 :
451 0 : u_memcpy(parseError->preContext,rules+start,limit-start);
452 : //null terminate the buffer
453 0 : parseError->preContext[limit-start] = 0;
454 :
455 : // for post-context; include error rules[pos]
456 0 : start = pos;
457 0 : limit = start + (U_PARSE_CONTEXT_LEN-1);
458 0 : if (limit > rulesLen) {
459 0 : limit = rulesLen;
460 : }
461 0 : if (start < rulesLen) {
462 0 : u_memcpy(parseError->postContext,rules+start,limit-start);
463 : }
464 : //null terminate the buffer
465 0 : parseError->postContext[limit-start]= 0;
466 : }
467 :
468 :
469 : static inline UStringPrepType
470 0 : getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471 :
472 : UStringPrepType type;
473 0 : if(trieWord == 0){
474 : /*
475 : * Initial value stored in the mapping table
476 : * just return USPREP_TYPE_LIMIT .. so that
477 : * the source codepoint is copied to the destination
478 : */
479 0 : type = USPREP_TYPE_LIMIT;
480 0 : isIndex =FALSE;
481 0 : value = 0;
482 0 : }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483 0 : type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
484 0 : isIndex =FALSE;
485 0 : value = 0;
486 : }else{
487 : /* get the type */
488 0 : type = USPREP_MAP;
489 : /* ascertain if the value is index or delta */
490 0 : if(trieWord & 0x02){
491 0 : isIndex = TRUE;
492 0 : value = trieWord >> 2; //mask off the lower 2 bits and shift
493 : }else{
494 0 : isIndex = FALSE;
495 0 : value = (int16_t)trieWord;
496 0 : value = (value >> 2);
497 : }
498 :
499 0 : if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500 0 : type = USPREP_DELETE;
501 0 : isIndex =FALSE;
502 0 : value = 0;
503 : }
504 : }
505 0 : return type;
506 : }
507 :
508 : // TODO: change to writing to UnicodeString not UChar *
509 : static int32_t
510 0 : usprep_map( const UStringPrepProfile* profile,
511 : const UChar* src, int32_t srcLength,
512 : UChar* dest, int32_t destCapacity,
513 : int32_t options,
514 : UParseError* parseError,
515 : UErrorCode* status ){
516 :
517 : uint16_t result;
518 0 : int32_t destIndex=0;
519 : int32_t srcIndex;
520 0 : UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
521 : UStringPrepType type;
522 : int16_t value;
523 : UBool isIndex;
524 0 : const int32_t* indexes = profile->indexes;
525 :
526 : // no error checking the caller check for error and arguments
527 : // no string length check the caller finds out the string length
528 :
529 0 : for(srcIndex=0;srcIndex<srcLength;){
530 : UChar32 ch;
531 :
532 0 : U16_NEXT(src,srcIndex,srcLength,ch);
533 :
534 0 : result=0;
535 :
536 0 : UTRIE_GET16(&profile->sprepTrie,ch,result);
537 :
538 0 : type = getValues(result, value, isIndex);
539 :
540 : // check if the source codepoint is unassigned
541 0 : if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
542 :
543 0 : uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
544 0 : *status = U_STRINGPREP_UNASSIGNED_ERROR;
545 0 : return 0;
546 :
547 0 : }else if(type == USPREP_MAP){
548 :
549 : int32_t index, length;
550 :
551 0 : if(isIndex){
552 0 : index = value;
553 0 : if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
554 0 : index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
555 0 : length = 1;
556 0 : }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
557 0 : index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
558 0 : length = 2;
559 0 : }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
560 0 : index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
561 0 : length = 3;
562 : }else{
563 0 : length = profile->mappingData[index++];
564 :
565 : }
566 :
567 : /* copy mapping to destination */
568 0 : for(int32_t i=0; i< length; i++){
569 0 : if(destIndex < destCapacity ){
570 0 : dest[destIndex] = profile->mappingData[index+i];
571 : }
572 0 : destIndex++; /* for pre-flighting */
573 : }
574 0 : continue;
575 : }else{
576 : // subtract the delta to arrive at the code point
577 0 : ch -= value;
578 : }
579 :
580 0 : }else if(type==USPREP_DELETE){
581 : // just consume the codepoint and contine
582 0 : continue;
583 : }
584 : //copy the code point into destination
585 0 : if(ch <= 0xFFFF){
586 0 : if(destIndex < destCapacity ){
587 0 : dest[destIndex] = (UChar)ch;
588 : }
589 0 : destIndex++;
590 : }else{
591 0 : if(destIndex+1 < destCapacity ){
592 0 : dest[destIndex] = U16_LEAD(ch);
593 0 : dest[destIndex+1] = U16_TRAIL(ch);
594 : }
595 0 : destIndex +=2;
596 : }
597 :
598 : }
599 :
600 0 : return u_terminateUChars(dest, destCapacity, destIndex, status);
601 : }
602 :
603 : /*
604 : 1) Map -- For each character in the input, check if it has a mapping
605 : and, if so, replace it with its mapping.
606 :
607 : 2) Normalize -- Possibly normalize the result of step 1 using Unicode
608 : normalization.
609 :
610 : 3) Prohibit -- Check for any characters that are not allowed in the
611 : output. If any are found, return an error.
612 :
613 : 4) Check bidi -- Possibly check for right-to-left characters, and if
614 : any are found, make sure that the whole string satisfies the
615 : requirements for bidirectional strings. If the string does not
616 : satisfy the requirements for bidirectional strings, return an
617 : error.
618 : [Unicode3.2] defines several bidirectional categories; each character
619 : has one bidirectional category assigned to it. For the purposes of
620 : the requirements below, an "RandALCat character" is a character that
621 : has Unicode bidirectional categories "R" or "AL"; an "LCat character"
622 : is a character that has Unicode bidirectional category "L". Note
623 :
624 :
625 : that there are many characters which fall in neither of the above
626 : definitions; Latin digits (<U+0030> through <U+0039>) are examples of
627 : this because they have bidirectional category "EN".
628 :
629 : In any profile that specifies bidirectional character handling, all
630 : three of the following requirements MUST be met:
631 :
632 : 1) The characters in section 5.8 MUST be prohibited.
633 :
634 : 2) If a string contains any RandALCat character, the string MUST NOT
635 : contain any LCat character.
636 :
637 : 3) If a string contains any RandALCat character, a RandALCat
638 : character MUST be the first character of the string, and a
639 : RandALCat character MUST be the last character of the string.
640 : */
641 : U_CAPI int32_t U_EXPORT2
642 0 : usprep_prepare( const UStringPrepProfile* profile,
643 : const UChar* src, int32_t srcLength,
644 : UChar* dest, int32_t destCapacity,
645 : int32_t options,
646 : UParseError* parseError,
647 : UErrorCode* status ){
648 :
649 : // check error status
650 0 : if(U_FAILURE(*status)){
651 0 : return 0;
652 : }
653 :
654 : //check arguments
655 0 : if(profile==NULL ||
656 0 : (src==NULL ? srcLength!=0 : srcLength<-1) ||
657 0 : (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
658 0 : *status=U_ILLEGAL_ARGUMENT_ERROR;
659 0 : return 0;
660 : }
661 :
662 : //get the string length
663 0 : if(srcLength < 0){
664 0 : srcLength = u_strlen(src);
665 : }
666 : // map
667 0 : UnicodeString s1;
668 0 : UChar *b1 = s1.getBuffer(srcLength);
669 0 : if(b1==NULL){
670 0 : *status = U_MEMORY_ALLOCATION_ERROR;
671 0 : return 0;
672 : }
673 0 : int32_t b1Len = usprep_map(profile, src, srcLength,
674 0 : b1, s1.getCapacity(), options, parseError, status);
675 0 : s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
676 :
677 0 : if(*status == U_BUFFER_OVERFLOW_ERROR){
678 : // redo processing of string
679 : /* we do not have enough room so grow the buffer*/
680 0 : b1 = s1.getBuffer(b1Len);
681 0 : if(b1==NULL){
682 0 : *status = U_MEMORY_ALLOCATION_ERROR;
683 0 : return 0;
684 : }
685 :
686 0 : *status = U_ZERO_ERROR; // reset error
687 0 : b1Len = usprep_map(profile, src, srcLength,
688 0 : b1, s1.getCapacity(), options, parseError, status);
689 0 : s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
690 : }
691 0 : if(U_FAILURE(*status)){
692 0 : return 0;
693 : }
694 :
695 : // normalize
696 0 : UnicodeString s2;
697 0 : if(profile->doNFKC){
698 0 : const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
699 0 : FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
700 0 : if(U_FAILURE(*status)){
701 0 : return 0;
702 : }
703 0 : fn2.normalize(s1, s2, *status);
704 : }else{
705 0 : s2.fastCopyFrom(s1);
706 : }
707 0 : if(U_FAILURE(*status)){
708 0 : return 0;
709 : }
710 :
711 : // Prohibit and checkBiDi in one pass
712 0 : const UChar *b2 = s2.getBuffer();
713 0 : int32_t b2Len = s2.length();
714 0 : UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
715 0 : UBool leftToRight=FALSE, rightToLeft=FALSE;
716 0 : int32_t rtlPos =-1, ltrPos =-1;
717 :
718 0 : for(int32_t b2Index=0; b2Index<b2Len;){
719 0 : UChar32 ch = 0;
720 0 : U16_NEXT(b2, b2Index, b2Len, ch);
721 :
722 : uint16_t result;
723 0 : UTRIE_GET16(&profile->sprepTrie,ch,result);
724 :
725 : int16_t value;
726 : UBool isIndex;
727 0 : UStringPrepType type = getValues(result, value, isIndex);
728 :
729 0 : if( type == USPREP_PROHIBITED ||
730 0 : ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
731 : ){
732 0 : *status = U_STRINGPREP_PROHIBITED_ERROR;
733 0 : uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
734 0 : return 0;
735 : }
736 :
737 0 : if(profile->checkBiDi) {
738 0 : direction = ubidi_getClass(profile->bdp, ch);
739 0 : if(firstCharDir == U_CHAR_DIRECTION_COUNT){
740 0 : firstCharDir = direction;
741 : }
742 0 : if(direction == U_LEFT_TO_RIGHT){
743 0 : leftToRight = TRUE;
744 0 : ltrPos = b2Index-1;
745 : }
746 0 : if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
747 0 : rightToLeft = TRUE;
748 0 : rtlPos = b2Index-1;
749 : }
750 : }
751 : }
752 0 : if(profile->checkBiDi == TRUE){
753 : // satisfy 2
754 0 : if( leftToRight == TRUE && rightToLeft == TRUE){
755 0 : *status = U_STRINGPREP_CHECK_BIDI_ERROR;
756 0 : uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
757 0 : return 0;
758 : }
759 :
760 : //satisfy 3
761 0 : if( rightToLeft == TRUE &&
762 0 : !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
763 0 : (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
764 : ){
765 0 : *status = U_STRINGPREP_CHECK_BIDI_ERROR;
766 0 : uprv_syntaxError(b2, rtlPos, b2Len, parseError);
767 0 : return FALSE;
768 : }
769 : }
770 0 : return s2.extract(dest, destCapacity, *status);
771 : }
772 :
773 :
774 : /* data swapping ------------------------------------------------------------ */
775 :
776 : U_CAPI int32_t U_EXPORT2
777 0 : usprep_swap(const UDataSwapper *ds,
778 : const void *inData, int32_t length, void *outData,
779 : UErrorCode *pErrorCode) {
780 : const UDataInfo *pInfo;
781 : int32_t headerSize;
782 :
783 : const uint8_t *inBytes;
784 : uint8_t *outBytes;
785 :
786 : const int32_t *inIndexes;
787 : int32_t indexes[16];
788 :
789 : int32_t i, offset, count, size;
790 :
791 : /* udata_swapDataHeader checks the arguments */
792 0 : headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
793 0 : if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
794 0 : return 0;
795 : }
796 :
797 : /* check data format and format version */
798 0 : pInfo=(const UDataInfo *)((const char *)inData+4);
799 0 : if(!(
800 0 : pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
801 0 : pInfo->dataFormat[1]==0x50 &&
802 0 : pInfo->dataFormat[2]==0x52 &&
803 0 : pInfo->dataFormat[3]==0x50 &&
804 0 : pInfo->formatVersion[0]==3
805 : )) {
806 0 : udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
807 0 : pInfo->dataFormat[0], pInfo->dataFormat[1],
808 0 : pInfo->dataFormat[2], pInfo->dataFormat[3],
809 0 : pInfo->formatVersion[0]);
810 0 : *pErrorCode=U_UNSUPPORTED_ERROR;
811 0 : return 0;
812 : }
813 :
814 0 : inBytes=(const uint8_t *)inData+headerSize;
815 0 : outBytes=(uint8_t *)outData+headerSize;
816 :
817 0 : inIndexes=(const int32_t *)inBytes;
818 :
819 0 : if(length>=0) {
820 0 : length-=headerSize;
821 0 : if(length<16*4) {
822 : udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
823 0 : length);
824 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
825 0 : return 0;
826 : }
827 : }
828 :
829 : /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
830 0 : for(i=0; i<16; ++i) {
831 0 : indexes[i]=udata_readInt32(ds, inIndexes[i]);
832 : }
833 :
834 : /* calculate the total length of the data */
835 0 : size=
836 0 : 16*4+ /* size of indexes[] */
837 0 : indexes[_SPREP_INDEX_TRIE_SIZE]+
838 0 : indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
839 :
840 0 : if(length>=0) {
841 0 : if(length<size) {
842 : udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
843 0 : length);
844 0 : *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
845 0 : return 0;
846 : }
847 :
848 : /* copy the data for inaccessible bytes */
849 0 : if(inBytes!=outBytes) {
850 0 : uprv_memcpy(outBytes, inBytes, size);
851 : }
852 :
853 0 : offset=0;
854 :
855 : /* swap the int32_t indexes[] */
856 0 : count=16*4;
857 0 : ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
858 0 : offset+=count;
859 :
860 : /* swap the UTrie */
861 0 : count=indexes[_SPREP_INDEX_TRIE_SIZE];
862 0 : utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
863 0 : offset+=count;
864 :
865 : /* swap the uint16_t mappingTable[] */
866 0 : count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
867 0 : ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
868 : //offset+=count;
869 : }
870 :
871 0 : return headerSize+size;
872 : }
873 :
874 : #endif /* #if !UCONFIG_NO_IDNA */
|