Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : *
6 : * Copyright (C) 1997-2016, International Business Machines
7 : * Corporation and others. All Rights Reserved.
8 : *
9 : *******************************************************************************
10 : * file name: loclikely.cpp
11 : * encoding: UTF-8
12 : * tab size: 8 (not used)
13 : * indentation:4
14 : *
15 : * created on: 2010feb25
16 : * created by: Markus W. Scherer
17 : *
18 : * Code for likely and minimized locale subtags, separated out from other .cpp files
19 : * that then do not depend on resource bundle code and likely-subtags data.
20 : */
21 :
22 : #include "unicode/utypes.h"
23 : #include "unicode/locid.h"
24 : #include "unicode/putil.h"
25 : #include "unicode/uchar.h"
26 : #include "unicode/uloc.h"
27 : #include "unicode/ures.h"
28 : #include "unicode/uscript.h"
29 : #include "cmemory.h"
30 : #include "cstring.h"
31 : #include "ulocimp.h"
32 : #include "ustr_imp.h"
33 :
34 : /**
35 : * This function looks for the localeID in the likelySubtags resource.
36 : *
37 : * @param localeID The tag to find.
38 : * @param buffer A buffer to hold the matching entry
39 : * @param bufferLength The length of the output buffer
40 : * @return A pointer to "buffer" if found, or a null pointer if not.
41 : */
42 : static const char* U_CALLCONV
43 4 : findLikelySubtags(const char* localeID,
44 : char* buffer,
45 : int32_t bufferLength,
46 : UErrorCode* err) {
47 4 : const char* result = NULL;
48 :
49 4 : if (!U_FAILURE(*err)) {
50 4 : int32_t resLen = 0;
51 4 : const UChar* s = NULL;
52 4 : UErrorCode tmpErr = U_ZERO_ERROR;
53 4 : UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
54 4 : if (U_SUCCESS(tmpErr)) {
55 4 : s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
56 :
57 4 : if (U_FAILURE(tmpErr)) {
58 : /*
59 : * If a resource is missing, it's not really an error, it's
60 : * just that we don't have any data for that particular locale ID.
61 : */
62 2 : if (tmpErr != U_MISSING_RESOURCE_ERROR) {
63 0 : *err = tmpErr;
64 : }
65 : }
66 2 : else if (resLen >= bufferLength) {
67 : /* The buffer should never overflow. */
68 0 : *err = U_INTERNAL_PROGRAM_ERROR;
69 : }
70 : else {
71 2 : u_UCharsToChars(s, buffer, resLen + 1);
72 2 : result = buffer;
73 : }
74 :
75 4 : ures_close(subtags);
76 : } else {
77 0 : *err = tmpErr;
78 : }
79 : }
80 :
81 4 : return result;
82 : }
83 :
84 : /**
85 : * Append a tag to a buffer, adding the separator if necessary. The buffer
86 : * must be large enough to contain the resulting tag plus any separator
87 : * necessary. The tag must not be a zero-length string.
88 : *
89 : * @param tag The tag to add.
90 : * @param tagLength The length of the tag.
91 : * @param buffer The output buffer.
92 : * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
93 : **/
94 : static void U_CALLCONV
95 12 : appendTag(
96 : const char* tag,
97 : int32_t tagLength,
98 : char* buffer,
99 : int32_t* bufferLength) {
100 :
101 12 : if (*bufferLength > 0) {
102 6 : buffer[*bufferLength] = '_';
103 6 : ++(*bufferLength);
104 : }
105 :
106 12 : uprv_memmove(
107 : &buffer[*bufferLength],
108 : tag,
109 12 : tagLength);
110 :
111 12 : *bufferLength += tagLength;
112 12 : }
113 :
114 : /**
115 : * These are the canonical strings for unknown languages, scripts and regions.
116 : **/
117 : static const char* const unknownLanguage = "und";
118 : static const char* const unknownScript = "Zzzz";
119 : static const char* const unknownRegion = "ZZ";
120 :
121 : /**
122 : * Create a tag string from the supplied parameters. The lang, script and region
123 : * parameters may be NULL pointers. If they are, their corresponding length parameters
124 : * must be less than or equal to 0.
125 : *
126 : * If any of the language, script or region parameters are empty, and the alternateTags
127 : * parameter is not NULL, it will be parsed for potential language, script and region tags
128 : * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
129 : * it contains no language tag, the default tag for the unknown language is used.
130 : *
131 : * If the length of the new string exceeds the capacity of the output buffer,
132 : * the function copies as many bytes to the output buffer as it can, and returns
133 : * the error U_BUFFER_OVERFLOW_ERROR.
134 : *
135 : * If an illegal argument is provided, the function returns the error
136 : * U_ILLEGAL_ARGUMENT_ERROR.
137 : *
138 : * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
139 : * the tag string fits in the output buffer, but the null terminator doesn't.
140 : *
141 : * @param lang The language tag to use.
142 : * @param langLength The length of the language tag.
143 : * @param script The script tag to use.
144 : * @param scriptLength The length of the script tag.
145 : * @param region The region tag to use.
146 : * @param regionLength The length of the region tag.
147 : * @param trailing Any trailing data to append to the new tag.
148 : * @param trailingLength The length of the trailing data.
149 : * @param alternateTags A string containing any alternate tags.
150 : * @param tag The output buffer.
151 : * @param tagCapacity The capacity of the output buffer.
152 : * @param err A pointer to a UErrorCode for error reporting.
153 : * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
154 : **/
155 : static int32_t U_CALLCONV
156 6 : createTagStringWithAlternates(
157 : const char* lang,
158 : int32_t langLength,
159 : const char* script,
160 : int32_t scriptLength,
161 : const char* region,
162 : int32_t regionLength,
163 : const char* trailing,
164 : int32_t trailingLength,
165 : const char* alternateTags,
166 : char* tag,
167 : int32_t tagCapacity,
168 : UErrorCode* err) {
169 :
170 6 : if (U_FAILURE(*err)) {
171 0 : goto error;
172 : }
173 6 : else if (tag == NULL ||
174 6 : tagCapacity <= 0 ||
175 6 : langLength >= ULOC_LANG_CAPACITY ||
176 6 : scriptLength >= ULOC_SCRIPT_CAPACITY ||
177 : regionLength >= ULOC_COUNTRY_CAPACITY) {
178 : goto error;
179 : }
180 : else {
181 : /**
182 : * ULOC_FULLNAME_CAPACITY will provide enough capacity
183 : * that we can build a string that contains the language,
184 : * script and region code without worrying about overrunning
185 : * the user-supplied buffer.
186 : **/
187 : char tagBuffer[ULOC_FULLNAME_CAPACITY];
188 6 : int32_t tagLength = 0;
189 6 : int32_t capacityRemaining = tagCapacity;
190 6 : UBool regionAppended = FALSE;
191 :
192 6 : if (langLength > 0) {
193 : appendTag(
194 : lang,
195 : langLength,
196 : tagBuffer,
197 4 : &tagLength);
198 : }
199 2 : else if (alternateTags == NULL) {
200 : /*
201 : * Append the value for an unknown language, if
202 : * we found no language.
203 : */
204 : appendTag(
205 : unknownLanguage,
206 : (int32_t)uprv_strlen(unknownLanguage),
207 : tagBuffer,
208 0 : &tagLength);
209 : }
210 : else {
211 : /*
212 : * Parse the alternateTags string for the language.
213 : */
214 : char alternateLang[ULOC_LANG_CAPACITY];
215 2 : int32_t alternateLangLength = sizeof(alternateLang);
216 :
217 : alternateLangLength =
218 : uloc_getLanguage(
219 : alternateTags,
220 : alternateLang,
221 : alternateLangLength,
222 2 : err);
223 2 : if(U_FAILURE(*err) ||
224 : alternateLangLength >= ULOC_LANG_CAPACITY) {
225 0 : goto error;
226 : }
227 2 : else if (alternateLangLength == 0) {
228 : /*
229 : * Append the value for an unknown language, if
230 : * we found no language.
231 : */
232 : appendTag(
233 : unknownLanguage,
234 : (int32_t)uprv_strlen(unknownLanguage),
235 : tagBuffer,
236 0 : &tagLength);
237 : }
238 : else {
239 : appendTag(
240 : alternateLang,
241 : alternateLangLength,
242 : tagBuffer,
243 2 : &tagLength);
244 : }
245 : }
246 :
247 6 : if (scriptLength > 0) {
248 : appendTag(
249 : script,
250 : scriptLength,
251 : tagBuffer,
252 0 : &tagLength);
253 : }
254 6 : else if (alternateTags != NULL) {
255 : /*
256 : * Parse the alternateTags string for the script.
257 : */
258 : char alternateScript[ULOC_SCRIPT_CAPACITY];
259 :
260 : const int32_t alternateScriptLength =
261 : uloc_getScript(
262 : alternateTags,
263 : alternateScript,
264 : sizeof(alternateScript),
265 2 : err);
266 :
267 2 : if (U_FAILURE(*err) ||
268 : alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
269 0 : goto error;
270 : }
271 2 : else if (alternateScriptLength > 0) {
272 : appendTag(
273 : alternateScript,
274 : alternateScriptLength,
275 : tagBuffer,
276 2 : &tagLength);
277 : }
278 : }
279 :
280 6 : if (regionLength > 0) {
281 : appendTag(
282 : region,
283 : regionLength,
284 : tagBuffer,
285 4 : &tagLength);
286 :
287 4 : regionAppended = TRUE;
288 : }
289 2 : else if (alternateTags != NULL) {
290 : /*
291 : * Parse the alternateTags string for the region.
292 : */
293 : char alternateRegion[ULOC_COUNTRY_CAPACITY];
294 :
295 : const int32_t alternateRegionLength =
296 : uloc_getCountry(
297 : alternateTags,
298 : alternateRegion,
299 : sizeof(alternateRegion),
300 0 : err);
301 0 : if (U_FAILURE(*err) ||
302 : alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
303 0 : goto error;
304 : }
305 0 : else if (alternateRegionLength > 0) {
306 : appendTag(
307 : alternateRegion,
308 : alternateRegionLength,
309 : tagBuffer,
310 0 : &tagLength);
311 :
312 0 : regionAppended = TRUE;
313 : }
314 : }
315 :
316 : {
317 : const int32_t toCopy =
318 6 : tagLength >= tagCapacity ? tagCapacity : tagLength;
319 :
320 : /**
321 : * Copy the partial tag from our internal buffer to the supplied
322 : * target.
323 : **/
324 6 : uprv_memcpy(
325 : tag,
326 : tagBuffer,
327 6 : toCopy);
328 :
329 6 : capacityRemaining -= toCopy;
330 : }
331 :
332 6 : if (trailingLength > 0) {
333 0 : if (*trailing != '@' && capacityRemaining > 0) {
334 0 : tag[tagLength++] = '_';
335 0 : --capacityRemaining;
336 0 : if (capacityRemaining > 0 && !regionAppended) {
337 : /* extra separator is required */
338 0 : tag[tagLength++] = '_';
339 0 : --capacityRemaining;
340 : }
341 : }
342 :
343 0 : if (capacityRemaining > 0) {
344 : /*
345 : * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
346 : * don't know if the user-supplied buffers overlap.
347 : */
348 : const int32_t toCopy =
349 0 : trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
350 :
351 0 : uprv_memmove(
352 : &tag[tagLength],
353 : trailing,
354 0 : toCopy);
355 : }
356 : }
357 :
358 6 : tagLength += trailingLength;
359 :
360 6 : return u_terminateChars(
361 : tag,
362 : tagCapacity,
363 : tagLength,
364 6 : err);
365 : }
366 :
367 : error:
368 :
369 : /**
370 : * An overflow indicates the locale ID passed in
371 : * is ill-formed. If we got here, and there was
372 : * no previous error, it's an implicit overflow.
373 : **/
374 0 : if (*err == U_BUFFER_OVERFLOW_ERROR ||
375 0 : U_SUCCESS(*err)) {
376 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
377 : }
378 :
379 0 : return -1;
380 : }
381 :
382 : /**
383 : * Create a tag string from the supplied parameters. The lang, script and region
384 : * parameters may be NULL pointers. If they are, their corresponding length parameters
385 : * must be less than or equal to 0. If the lang parameter is an empty string, the
386 : * default value for an unknown language is written to the output buffer.
387 : *
388 : * If the length of the new string exceeds the capacity of the output buffer,
389 : * the function copies as many bytes to the output buffer as it can, and returns
390 : * the error U_BUFFER_OVERFLOW_ERROR.
391 : *
392 : * If an illegal argument is provided, the function returns the error
393 : * U_ILLEGAL_ARGUMENT_ERROR.
394 : *
395 : * @param lang The language tag to use.
396 : * @param langLength The length of the language tag.
397 : * @param script The script tag to use.
398 : * @param scriptLength The length of the script tag.
399 : * @param region The region tag to use.
400 : * @param regionLength The length of the region tag.
401 : * @param trailing Any trailing data to append to the new tag.
402 : * @param trailingLength The length of the trailing data.
403 : * @param tag The output buffer.
404 : * @param tagCapacity The capacity of the output buffer.
405 : * @param err A pointer to a UErrorCode for error reporting.
406 : * @return The length of the tag string, which may be greater than tagCapacity.
407 : **/
408 : static int32_t U_CALLCONV
409 4 : createTagString(
410 : const char* lang,
411 : int32_t langLength,
412 : const char* script,
413 : int32_t scriptLength,
414 : const char* region,
415 : int32_t regionLength,
416 : const char* trailing,
417 : int32_t trailingLength,
418 : char* tag,
419 : int32_t tagCapacity,
420 : UErrorCode* err)
421 : {
422 : return createTagStringWithAlternates(
423 : lang,
424 : langLength,
425 : script,
426 : scriptLength,
427 : region,
428 : regionLength,
429 : trailing,
430 : trailingLength,
431 : NULL,
432 : tag,
433 : tagCapacity,
434 4 : err);
435 : }
436 :
437 : /**
438 : * Parse the language, script, and region subtags from a tag string, and copy the
439 : * results into the corresponding output parameters. The buffers are null-terminated,
440 : * unless overflow occurs.
441 : *
442 : * The langLength, scriptLength, and regionLength parameters are input/output
443 : * parameters, and must contain the capacity of their corresponding buffers on
444 : * input. On output, they will contain the actual length of the buffers, not
445 : * including the null terminator.
446 : *
447 : * If the length of any of the output subtags exceeds the capacity of the corresponding
448 : * buffer, the function copies as many bytes to the output buffer as it can, and returns
449 : * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
450 : * occurs.
451 : *
452 : * If an illegal argument is provided, the function returns the error
453 : * U_ILLEGAL_ARGUMENT_ERROR.
454 : *
455 : * @param localeID The locale ID to parse.
456 : * @param lang The language tag buffer.
457 : * @param langLength The length of the language tag.
458 : * @param script The script tag buffer.
459 : * @param scriptLength The length of the script tag.
460 : * @param region The region tag buffer.
461 : * @param regionLength The length of the region tag.
462 : * @param err A pointer to a UErrorCode for error reporting.
463 : * @return The number of chars of the localeID parameter consumed.
464 : **/
465 : static int32_t U_CALLCONV
466 2 : parseTagString(
467 : const char* localeID,
468 : char* lang,
469 : int32_t* langLength,
470 : char* script,
471 : int32_t* scriptLength,
472 : char* region,
473 : int32_t* regionLength,
474 : UErrorCode* err)
475 : {
476 2 : const char* position = localeID;
477 2 : int32_t subtagLength = 0;
478 :
479 6 : if(U_FAILURE(*err) ||
480 2 : localeID == NULL ||
481 2 : lang == NULL ||
482 2 : langLength == NULL ||
483 2 : script == NULL ||
484 2 : scriptLength == NULL ||
485 4 : region == NULL ||
486 : regionLength == NULL) {
487 0 : goto error;
488 : }
489 :
490 2 : subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
491 2 : u_terminateChars(lang, *langLength, subtagLength, err);
492 :
493 : /*
494 : * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
495 : * to be an error, because it indicates the user-supplied tag is
496 : * not well-formed.
497 : */
498 2 : if(U_FAILURE(*err)) {
499 0 : goto error;
500 : }
501 :
502 2 : *langLength = subtagLength;
503 :
504 : /*
505 : * If no language was present, use the value of unknownLanguage
506 : * instead. Otherwise, move past any separator.
507 : */
508 2 : if (*langLength == 0) {
509 : uprv_strcpy(
510 : lang,
511 0 : unknownLanguage);
512 0 : *langLength = (int32_t)uprv_strlen(lang);
513 : }
514 2 : else if (_isIDSeparator(*position)) {
515 2 : ++position;
516 : }
517 :
518 2 : subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
519 2 : u_terminateChars(script, *scriptLength, subtagLength, err);
520 :
521 2 : if(U_FAILURE(*err)) {
522 0 : goto error;
523 : }
524 :
525 2 : *scriptLength = subtagLength;
526 :
527 2 : if (*scriptLength > 0) {
528 0 : if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
529 : /**
530 : * If the script part is the "unknown" script, then don't return it.
531 : **/
532 0 : *scriptLength = 0;
533 : }
534 :
535 : /*
536 : * Move past any separator.
537 : */
538 0 : if (_isIDSeparator(*position)) {
539 0 : ++position;
540 : }
541 : }
542 :
543 2 : subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
544 2 : u_terminateChars(region, *regionLength, subtagLength, err);
545 :
546 2 : if(U_FAILURE(*err)) {
547 0 : goto error;
548 : }
549 :
550 2 : *regionLength = subtagLength;
551 :
552 2 : if (*regionLength > 0) {
553 2 : if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
554 : /**
555 : * If the region part is the "unknown" region, then don't return it.
556 : **/
557 0 : *regionLength = 0;
558 : }
559 0 : } else if (*position != 0 && *position != '@') {
560 : /* back up over consumed trailing separator */
561 0 : --position;
562 : }
563 :
564 : exit:
565 :
566 4 : return (int32_t)(position - localeID);
567 :
568 : error:
569 :
570 : /**
571 : * If we get here, we have no explicit error, it's the result of an
572 : * illegal argument.
573 : **/
574 0 : if (!U_FAILURE(*err)) {
575 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
576 : }
577 :
578 0 : goto exit;
579 : }
580 :
581 : static int32_t U_CALLCONV
582 2 : createLikelySubtagsString(
583 : const char* lang,
584 : int32_t langLength,
585 : const char* script,
586 : int32_t scriptLength,
587 : const char* region,
588 : int32_t regionLength,
589 : const char* variants,
590 : int32_t variantsLength,
591 : char* tag,
592 : int32_t tagCapacity,
593 : UErrorCode* err)
594 : {
595 : /**
596 : * ULOC_FULLNAME_CAPACITY will provide enough capacity
597 : * that we can build a string that contains the language,
598 : * script and region code without worrying about overrunning
599 : * the user-supplied buffer.
600 : **/
601 : char tagBuffer[ULOC_FULLNAME_CAPACITY];
602 : char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
603 :
604 2 : if(U_FAILURE(*err)) {
605 0 : goto error;
606 : }
607 :
608 : /**
609 : * Try the language with the script and region first.
610 : **/
611 2 : if (scriptLength > 0 && regionLength > 0) {
612 :
613 0 : const char* likelySubtags = NULL;
614 :
615 : createTagString(
616 : lang,
617 : langLength,
618 : script,
619 : scriptLength,
620 : region,
621 : regionLength,
622 : NULL,
623 : 0,
624 : tagBuffer,
625 : sizeof(tagBuffer),
626 0 : err);
627 0 : if(U_FAILURE(*err)) {
628 0 : goto error;
629 : }
630 :
631 : likelySubtags =
632 : findLikelySubtags(
633 : tagBuffer,
634 : likelySubtagsBuffer,
635 : sizeof(likelySubtagsBuffer),
636 0 : err);
637 0 : if(U_FAILURE(*err)) {
638 0 : goto error;
639 : }
640 :
641 0 : if (likelySubtags != NULL) {
642 : /* Always use the language tag from the
643 : maximal string, since it may be more
644 : specific than the one provided. */
645 : return createTagStringWithAlternates(
646 : NULL,
647 : 0,
648 : NULL,
649 : 0,
650 : NULL,
651 : 0,
652 : variants,
653 : variantsLength,
654 : likelySubtags,
655 : tag,
656 : tagCapacity,
657 0 : err);
658 : }
659 : }
660 :
661 : /**
662 : * Try the language with just the script.
663 : **/
664 2 : if (scriptLength > 0) {
665 :
666 0 : const char* likelySubtags = NULL;
667 :
668 : createTagString(
669 : lang,
670 : langLength,
671 : script,
672 : scriptLength,
673 : NULL,
674 : 0,
675 : NULL,
676 : 0,
677 : tagBuffer,
678 : sizeof(tagBuffer),
679 0 : err);
680 0 : if(U_FAILURE(*err)) {
681 0 : goto error;
682 : }
683 :
684 : likelySubtags =
685 : findLikelySubtags(
686 : tagBuffer,
687 : likelySubtagsBuffer,
688 : sizeof(likelySubtagsBuffer),
689 0 : err);
690 0 : if(U_FAILURE(*err)) {
691 0 : goto error;
692 : }
693 :
694 0 : if (likelySubtags != NULL) {
695 : /* Always use the language tag from the
696 : maximal string, since it may be more
697 : specific than the one provided. */
698 : return createTagStringWithAlternates(
699 : NULL,
700 : 0,
701 : NULL,
702 : 0,
703 : region,
704 : regionLength,
705 : variants,
706 : variantsLength,
707 : likelySubtags,
708 : tag,
709 : tagCapacity,
710 0 : err);
711 : }
712 : }
713 :
714 : /**
715 : * Try the language with just the region.
716 : **/
717 2 : if (regionLength > 0) {
718 :
719 2 : const char* likelySubtags = NULL;
720 :
721 : createTagString(
722 : lang,
723 : langLength,
724 : NULL,
725 : 0,
726 : region,
727 : regionLength,
728 : NULL,
729 : 0,
730 : tagBuffer,
731 : sizeof(tagBuffer),
732 2 : err);
733 2 : if(U_FAILURE(*err)) {
734 0 : goto error;
735 : }
736 :
737 : likelySubtags =
738 : findLikelySubtags(
739 : tagBuffer,
740 : likelySubtagsBuffer,
741 : sizeof(likelySubtagsBuffer),
742 2 : err);
743 2 : if(U_FAILURE(*err)) {
744 0 : goto error;
745 : }
746 :
747 2 : if (likelySubtags != NULL) {
748 : /* Always use the language tag from the
749 : maximal string, since it may be more
750 : specific than the one provided. */
751 : return createTagStringWithAlternates(
752 : NULL,
753 : 0,
754 : script,
755 : scriptLength,
756 : NULL,
757 : 0,
758 : variants,
759 : variantsLength,
760 : likelySubtags,
761 : tag,
762 : tagCapacity,
763 0 : err);
764 : }
765 : }
766 :
767 : /**
768 : * Finally, try just the language.
769 : **/
770 : {
771 2 : const char* likelySubtags = NULL;
772 :
773 : createTagString(
774 : lang,
775 : langLength,
776 : NULL,
777 : 0,
778 : NULL,
779 : 0,
780 : NULL,
781 : 0,
782 : tagBuffer,
783 : sizeof(tagBuffer),
784 2 : err);
785 2 : if(U_FAILURE(*err)) {
786 0 : goto error;
787 : }
788 :
789 : likelySubtags =
790 : findLikelySubtags(
791 : tagBuffer,
792 : likelySubtagsBuffer,
793 : sizeof(likelySubtagsBuffer),
794 2 : err);
795 2 : if(U_FAILURE(*err)) {
796 0 : goto error;
797 : }
798 :
799 2 : if (likelySubtags != NULL) {
800 : /* Always use the language tag from the
801 : maximal string, since it may be more
802 : specific than the one provided. */
803 : return createTagStringWithAlternates(
804 : NULL,
805 : 0,
806 : script,
807 : scriptLength,
808 : region,
809 : regionLength,
810 : variants,
811 : variantsLength,
812 : likelySubtags,
813 : tag,
814 : tagCapacity,
815 2 : err);
816 : }
817 : }
818 :
819 : return u_terminateChars(
820 : tag,
821 : tagCapacity,
822 : 0,
823 0 : err);
824 :
825 : error:
826 :
827 0 : if (!U_FAILURE(*err)) {
828 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
829 : }
830 :
831 0 : return -1;
832 : }
833 :
834 : #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
835 : { int32_t count = 0; \
836 : int32_t i; \
837 : for (i = 0; i < trailingLength; i++) { \
838 : if (trailing[i] == '-' || trailing[i] == '_') { \
839 : count = 0; \
840 : if (count > 8) { \
841 : goto error; \
842 : } \
843 : } else if (trailing[i] == '@') { \
844 : break; \
845 : } else if (count > 8) { \
846 : goto error; \
847 : } else { \
848 : count++; \
849 : } \
850 : } \
851 : }
852 :
853 : static int32_t
854 2 : _uloc_addLikelySubtags(const char* localeID,
855 : char* maximizedLocaleID,
856 : int32_t maximizedLocaleIDCapacity,
857 : UErrorCode* err)
858 : {
859 : char lang[ULOC_LANG_CAPACITY];
860 2 : int32_t langLength = sizeof(lang);
861 : char script[ULOC_SCRIPT_CAPACITY];
862 2 : int32_t scriptLength = sizeof(script);
863 : char region[ULOC_COUNTRY_CAPACITY];
864 2 : int32_t regionLength = sizeof(region);
865 2 : const char* trailing = "";
866 2 : int32_t trailingLength = 0;
867 2 : int32_t trailingIndex = 0;
868 2 : int32_t resultLength = 0;
869 :
870 2 : if(U_FAILURE(*err)) {
871 0 : goto error;
872 : }
873 2 : else if (localeID == NULL ||
874 2 : maximizedLocaleID == NULL ||
875 2 : maximizedLocaleIDCapacity <= 0) {
876 : goto error;
877 : }
878 :
879 : trailingIndex = parseTagString(
880 : localeID,
881 : lang,
882 : &langLength,
883 : script,
884 : &scriptLength,
885 : region,
886 : ®ionLength,
887 2 : err);
888 2 : if(U_FAILURE(*err)) {
889 : /* Overflow indicates an illegal argument error */
890 0 : if (*err == U_BUFFER_OVERFLOW_ERROR) {
891 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
892 : }
893 :
894 0 : goto error;
895 : }
896 :
897 : /* Find the length of the trailing portion. */
898 2 : while (_isIDSeparator(localeID[trailingIndex])) {
899 0 : trailingIndex++;
900 : }
901 2 : trailing = &localeID[trailingIndex];
902 2 : trailingLength = (int32_t)uprv_strlen(trailing);
903 :
904 2 : CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
905 :
906 : resultLength =
907 2 : createLikelySubtagsString(
908 : lang,
909 : langLength,
910 : script,
911 : scriptLength,
912 : region,
913 : regionLength,
914 : trailing,
915 : trailingLength,
916 : maximizedLocaleID,
917 : maximizedLocaleIDCapacity,
918 2 : err);
919 :
920 2 : if (resultLength == 0) {
921 0 : const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
922 :
923 : /*
924 : * If we get here, we need to return localeID.
925 : */
926 0 : uprv_memcpy(
927 : maximizedLocaleID,
928 : localeID,
929 : localIDLength <= maximizedLocaleIDCapacity ?
930 0 : localIDLength : maximizedLocaleIDCapacity);
931 :
932 : resultLength =
933 0 : u_terminateChars(
934 : maximizedLocaleID,
935 : maximizedLocaleIDCapacity,
936 : localIDLength,
937 0 : err);
938 : }
939 :
940 2 : return resultLength;
941 :
942 : error:
943 :
944 0 : if (!U_FAILURE(*err)) {
945 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
946 : }
947 :
948 0 : return -1;
949 : }
950 :
951 : static int32_t
952 0 : _uloc_minimizeSubtags(const char* localeID,
953 : char* minimizedLocaleID,
954 : int32_t minimizedLocaleIDCapacity,
955 : UErrorCode* err)
956 : {
957 : /**
958 : * ULOC_FULLNAME_CAPACITY will provide enough capacity
959 : * that we can build a string that contains the language,
960 : * script and region code without worrying about overrunning
961 : * the user-supplied buffer.
962 : **/
963 : char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
964 0 : int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
965 :
966 : char lang[ULOC_LANG_CAPACITY];
967 0 : int32_t langLength = sizeof(lang);
968 : char script[ULOC_SCRIPT_CAPACITY];
969 0 : int32_t scriptLength = sizeof(script);
970 : char region[ULOC_COUNTRY_CAPACITY];
971 0 : int32_t regionLength = sizeof(region);
972 0 : const char* trailing = "";
973 0 : int32_t trailingLength = 0;
974 0 : int32_t trailingIndex = 0;
975 :
976 0 : if(U_FAILURE(*err)) {
977 0 : goto error;
978 : }
979 0 : else if (localeID == NULL ||
980 0 : minimizedLocaleID == NULL ||
981 0 : minimizedLocaleIDCapacity <= 0) {
982 : goto error;
983 : }
984 :
985 : trailingIndex =
986 : parseTagString(
987 : localeID,
988 : lang,
989 : &langLength,
990 : script,
991 : &scriptLength,
992 : region,
993 : ®ionLength,
994 0 : err);
995 0 : if(U_FAILURE(*err)) {
996 :
997 : /* Overflow indicates an illegal argument error */
998 0 : if (*err == U_BUFFER_OVERFLOW_ERROR) {
999 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
1000 : }
1001 :
1002 0 : goto error;
1003 : }
1004 :
1005 : /* Find the spot where the variants or the keywords begin, if any. */
1006 0 : while (_isIDSeparator(localeID[trailingIndex])) {
1007 0 : trailingIndex++;
1008 : }
1009 0 : trailing = &localeID[trailingIndex];
1010 0 : trailingLength = (int32_t)uprv_strlen(trailing);
1011 :
1012 0 : CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1013 :
1014 : createTagString(
1015 : lang,
1016 : langLength,
1017 : script,
1018 : scriptLength,
1019 : region,
1020 : regionLength,
1021 : NULL,
1022 : 0,
1023 : maximizedTagBuffer,
1024 : maximizedTagBufferLength,
1025 0 : err);
1026 0 : if(U_FAILURE(*err)) {
1027 0 : goto error;
1028 : }
1029 :
1030 : /**
1031 : * First, we need to first get the maximization
1032 : * from AddLikelySubtags.
1033 : **/
1034 : maximizedTagBufferLength =
1035 : uloc_addLikelySubtags(
1036 : maximizedTagBuffer,
1037 : maximizedTagBuffer,
1038 : maximizedTagBufferLength,
1039 0 : err);
1040 :
1041 0 : if(U_FAILURE(*err)) {
1042 0 : goto error;
1043 : }
1044 :
1045 : /**
1046 : * Start first with just the language.
1047 : **/
1048 : {
1049 : char tagBuffer[ULOC_FULLNAME_CAPACITY];
1050 :
1051 : const int32_t tagBufferLength =
1052 0 : createLikelySubtagsString(
1053 : lang,
1054 : langLength,
1055 : NULL,
1056 : 0,
1057 : NULL,
1058 : 0,
1059 : NULL,
1060 : 0,
1061 : tagBuffer,
1062 : sizeof(tagBuffer),
1063 0 : err);
1064 :
1065 0 : if(U_FAILURE(*err)) {
1066 0 : goto error;
1067 : }
1068 0 : else if (uprv_strnicmp(
1069 : maximizedTagBuffer,
1070 : tagBuffer,
1071 : tagBufferLength) == 0) {
1072 :
1073 0 : return createTagString(
1074 : lang,
1075 : langLength,
1076 : NULL,
1077 : 0,
1078 : NULL,
1079 : 0,
1080 : trailing,
1081 : trailingLength,
1082 : minimizedLocaleID,
1083 : minimizedLocaleIDCapacity,
1084 0 : err);
1085 : }
1086 : }
1087 :
1088 : /**
1089 : * Next, try the language and region.
1090 : **/
1091 0 : if (regionLength > 0) {
1092 :
1093 : char tagBuffer[ULOC_FULLNAME_CAPACITY];
1094 :
1095 : const int32_t tagBufferLength =
1096 0 : createLikelySubtagsString(
1097 : lang,
1098 : langLength,
1099 : NULL,
1100 : 0,
1101 : region,
1102 : regionLength,
1103 : NULL,
1104 : 0,
1105 : tagBuffer,
1106 : sizeof(tagBuffer),
1107 0 : err);
1108 :
1109 0 : if(U_FAILURE(*err)) {
1110 0 : goto error;
1111 : }
1112 0 : else if (uprv_strnicmp(
1113 : maximizedTagBuffer,
1114 : tagBuffer,
1115 : tagBufferLength) == 0) {
1116 :
1117 0 : return createTagString(
1118 : lang,
1119 : langLength,
1120 : NULL,
1121 : 0,
1122 : region,
1123 : regionLength,
1124 : trailing,
1125 : trailingLength,
1126 : minimizedLocaleID,
1127 : minimizedLocaleIDCapacity,
1128 0 : err);
1129 : }
1130 : }
1131 :
1132 : /**
1133 : * Finally, try the language and script. This is our last chance,
1134 : * since trying with all three subtags would only yield the
1135 : * maximal version that we already have.
1136 : **/
1137 0 : if (scriptLength > 0 && regionLength > 0) {
1138 : char tagBuffer[ULOC_FULLNAME_CAPACITY];
1139 :
1140 : const int32_t tagBufferLength =
1141 0 : createLikelySubtagsString(
1142 : lang,
1143 : langLength,
1144 : script,
1145 : scriptLength,
1146 : NULL,
1147 : 0,
1148 : NULL,
1149 : 0,
1150 : tagBuffer,
1151 : sizeof(tagBuffer),
1152 0 : err);
1153 :
1154 0 : if(U_FAILURE(*err)) {
1155 0 : goto error;
1156 : }
1157 0 : else if (uprv_strnicmp(
1158 : maximizedTagBuffer,
1159 : tagBuffer,
1160 : tagBufferLength) == 0) {
1161 :
1162 0 : return createTagString(
1163 : lang,
1164 : langLength,
1165 : script,
1166 : scriptLength,
1167 : NULL,
1168 : 0,
1169 : trailing,
1170 : trailingLength,
1171 : minimizedLocaleID,
1172 : minimizedLocaleIDCapacity,
1173 0 : err);
1174 : }
1175 : }
1176 :
1177 : {
1178 : /**
1179 : * If we got here, return the locale ID parameter.
1180 : **/
1181 0 : const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1182 :
1183 0 : uprv_memcpy(
1184 : minimizedLocaleID,
1185 : localeID,
1186 : localeIDLength <= minimizedLocaleIDCapacity ?
1187 0 : localeIDLength : minimizedLocaleIDCapacity);
1188 :
1189 0 : return u_terminateChars(
1190 : minimizedLocaleID,
1191 : minimizedLocaleIDCapacity,
1192 : localeIDLength,
1193 0 : err);
1194 : }
1195 :
1196 : error:
1197 :
1198 0 : if (!U_FAILURE(*err)) {
1199 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
1200 : }
1201 :
1202 0 : return -1;
1203 :
1204 :
1205 : }
1206 :
1207 : static UBool
1208 2 : do_canonicalize(const char* localeID,
1209 : char* buffer,
1210 : int32_t bufferCapacity,
1211 : UErrorCode* err)
1212 : {
1213 : uloc_canonicalize(
1214 : localeID,
1215 : buffer,
1216 : bufferCapacity,
1217 2 : err);
1218 :
1219 4 : if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1220 2 : *err == U_BUFFER_OVERFLOW_ERROR) {
1221 0 : *err = U_ILLEGAL_ARGUMENT_ERROR;
1222 :
1223 0 : return FALSE;
1224 : }
1225 2 : else if (U_FAILURE(*err)) {
1226 :
1227 0 : return FALSE;
1228 : }
1229 : else {
1230 2 : return TRUE;
1231 : }
1232 : }
1233 :
1234 : U_CAPI int32_t U_EXPORT2
1235 2 : uloc_addLikelySubtags(const char* localeID,
1236 : char* maximizedLocaleID,
1237 : int32_t maximizedLocaleIDCapacity,
1238 : UErrorCode* err)
1239 : {
1240 : char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241 :
1242 2 : if (!do_canonicalize(
1243 : localeID,
1244 : localeBuffer,
1245 : sizeof(localeBuffer),
1246 : err)) {
1247 0 : return -1;
1248 : }
1249 : else {
1250 : return _uloc_addLikelySubtags(
1251 : localeBuffer,
1252 : maximizedLocaleID,
1253 : maximizedLocaleIDCapacity,
1254 2 : err);
1255 : }
1256 : }
1257 :
1258 : U_CAPI int32_t U_EXPORT2
1259 0 : uloc_minimizeSubtags(const char* localeID,
1260 : char* minimizedLocaleID,
1261 : int32_t minimizedLocaleIDCapacity,
1262 : UErrorCode* err)
1263 : {
1264 : char localeBuffer[ULOC_FULLNAME_CAPACITY];
1265 :
1266 0 : if (!do_canonicalize(
1267 : localeID,
1268 : localeBuffer,
1269 : sizeof(localeBuffer),
1270 : err)) {
1271 0 : return -1;
1272 : }
1273 : else {
1274 : return _uloc_minimizeSubtags(
1275 : localeBuffer,
1276 : minimizedLocaleID,
1277 : minimizedLocaleIDCapacity,
1278 0 : err);
1279 : }
1280 : }
1281 :
1282 : // Pairs of (language subtag, + or -) for finding out fast if common languages
1283 : // are LTR (minus) or RTL (plus).
1284 : static const char* LANG_DIR_STRING =
1285 : "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1286 :
1287 : // Implemented here because this calls uloc_addLikelySubtags().
1288 : U_CAPI UBool U_EXPORT2
1289 4 : uloc_isRightToLeft(const char *locale) {
1290 4 : UErrorCode errorCode = U_ZERO_ERROR;
1291 : char script[8];
1292 4 : int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1293 4 : if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294 : scriptLength == 0) {
1295 : // Fastpath: We know the likely scripts and their writing direction
1296 : // for some common languages.
1297 4 : errorCode = U_ZERO_ERROR;
1298 : char lang[8];
1299 4 : int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1300 4 : if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1301 : langLength == 0) {
1302 4 : return FALSE;
1303 : }
1304 4 : const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1305 4 : if (langPtr != NULL) {
1306 4 : switch (langPtr[langLength]) {
1307 4 : case '-': return FALSE;
1308 0 : case '+': return TRUE;
1309 0 : default: break; // partial match of a longer code
1310 : }
1311 : }
1312 : // Otherwise, find the likely script.
1313 0 : errorCode = U_ZERO_ERROR;
1314 : char likely[ULOC_FULLNAME_CAPACITY];
1315 0 : (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1316 0 : if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1317 0 : return FALSE;
1318 : }
1319 0 : scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1320 0 : if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1321 : scriptLength == 0) {
1322 0 : return FALSE;
1323 : }
1324 : }
1325 0 : UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1326 0 : return uscript_isRightToLeft(scriptCode);
1327 : }
1328 :
1329 : U_NAMESPACE_BEGIN
1330 :
1331 : UBool
1332 0 : Locale::isRightToLeft() const {
1333 0 : return uloc_isRightToLeft(getBaseName());
1334 : }
1335 :
1336 : U_NAMESPACE_END
1337 :
1338 : // The following must at least allow for rg key value (6) plus terminator (1).
1339 : #define ULOC_RG_BUFLEN 8
1340 :
1341 : U_CAPI int32_t U_EXPORT2
1342 0 : ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1343 : char *region, int32_t regionCapacity, UErrorCode* status) {
1344 0 : if (U_FAILURE(*status)) {
1345 0 : return 0;
1346 : }
1347 : char rgBuf[ULOC_RG_BUFLEN];
1348 0 : UErrorCode rgStatus = U_ZERO_ERROR;
1349 :
1350 : // First check for rg keyword value
1351 0 : int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1352 0 : if (U_FAILURE(rgStatus) || rgLen != 6) {
1353 0 : rgLen = 0;
1354 : } else {
1355 : // rgBuf guaranteed to be zero terminated here, with text len 6
1356 0 : char *rgPtr = rgBuf;
1357 0 : for (; *rgPtr!= 0; rgPtr++) {
1358 0 : *rgPtr = uprv_toupper(*rgPtr);
1359 : }
1360 0 : rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1361 : }
1362 :
1363 0 : if (rgLen == 0) {
1364 : // No valid rg keyword value, try for unicode_region_subtag
1365 0 : rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1366 0 : if (U_FAILURE(*status)) {
1367 0 : rgLen = 0;
1368 0 : } else if (rgLen == 0 && inferRegion) {
1369 : // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1370 : char locBuf[ULOC_FULLNAME_CAPACITY];
1371 0 : rgStatus = U_ZERO_ERROR;
1372 0 : (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1373 0 : if (U_SUCCESS(rgStatus)) {
1374 0 : rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1375 0 : if (U_FAILURE(*status)) {
1376 0 : rgLen = 0;
1377 : }
1378 : }
1379 : }
1380 : }
1381 :
1382 0 : rgBuf[rgLen] = 0;
1383 0 : uprv_strncpy(region, rgBuf, regionCapacity);
1384 0 : return u_terminateChars(region, regionCapacity, rgLen, status);
1385 : }
1386 :
|