Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 1997-2016, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : *
9 : * File ULOC.CPP
10 : *
11 : * Modification History:
12 : *
13 : * Date Name Description
14 : * 04/01/97 aliu Creation.
15 : * 08/21/98 stephen JDK 1.2 sync
16 : * 12/08/98 rtg New Locale implementation and C API
17 : * 03/15/99 damiba overhaul.
18 : * 04/06/99 stephen changed setDefault() to realloc and copy
19 : * 06/14/99 stephen Changed calls to ures_open for new params
20 : * 07/21/99 stephen Modified setDefault() to propagate to C++
21 : * 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22 : * brought canonicalization code into line with spec
23 : *****************************************************************************/
24 :
25 : /*
26 : POSIX's locale format, from putil.c: [no spaces]
27 :
28 : ll [ _CC ] [ . MM ] [ @ VV]
29 :
30 : l = lang, C = ctry, M = charmap, V = variant
31 : */
32 :
33 : #include "unicode/utypes.h"
34 : #include "unicode/ustring.h"
35 : #include "unicode/uloc.h"
36 :
37 : #include "putilimp.h"
38 : #include "ustr_imp.h"
39 : #include "ulocimp.h"
40 : #include "umutex.h"
41 : #include "cstring.h"
42 : #include "cmemory.h"
43 : #include "locmap.h"
44 : #include "uarrsort.h"
45 : #include "uenumimp.h"
46 : #include "uassert.h"
47 : #include "charstr.h"
48 :
49 : #include <stdio.h> /* for sprintf */
50 :
51 : U_NAMESPACE_USE
52 :
53 : /* ### Declarations **************************************************/
54 :
55 : /* Locale stuff from locid.cpp */
56 : U_CFUNC void locale_set_default(const char *id);
57 : U_CFUNC const char *locale_get_default(void);
58 : U_CFUNC int32_t
59 : locale_getKeywords(const char *localeID,
60 : char prev,
61 : char *keywords, int32_t keywordCapacity,
62 : char *values, int32_t valuesCapacity, int32_t *valLen,
63 : UBool valuesToo,
64 : UErrorCode *status);
65 :
66 : /* ### Data tables **************************************************/
67 :
68 : /**
69 : * Table of language codes, both 2- and 3-letter, with preference
70 : * given to 2-letter codes where possible. Includes 3-letter codes
71 : * that lack a 2-letter equivalent.
72 : *
73 : * This list must be in sorted order. This list is returned directly
74 : * to the user by some API.
75 : *
76 : * This list must be kept in sync with LANGUAGES_3, with corresponding
77 : * entries matched.
78 : *
79 : * This table should be terminated with a NULL entry, followed by a
80 : * second list, and another NULL entry. The first list is visible to
81 : * user code when this array is returned by API. The second list
82 : * contains codes we support, but do not expose through user API.
83 : *
84 : * Notes
85 : *
86 : * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87 : * include the revisions up to 2001/7/27 *CWB*
88 : *
89 : * The 3 character codes are the terminology codes like RFC 3066. This
90 : * is compatible with prior ICU codes
91 : *
92 : * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93 : * table but now at the end of the table because 3 character codes are
94 : * duplicates. This avoids bad searches going from 3 to 2 character
95 : * codes.
96 : *
97 : * The range qaa-qtz is reserved for local use
98 : */
99 : /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100 : /* ISO639 table version is 20150505 */
101 : static const char * const LANGUAGES[] = {
102 : "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
103 : "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
104 : "aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
105 : "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
106 : "asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
107 : "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
108 : "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
109 : "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
110 : "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
111 : "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
112 : "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg",
113 : "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
114 : "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
115 : "cs", "csb", "cu", "cv", "cy",
116 : "da", "dak", "dar", "dav", "de", "del", "den", "dgr",
117 : "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
118 : "dyo", "dyu", "dz", "dzg",
119 : "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
120 : "en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
121 : "ext",
122 : "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
123 : "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
124 : "frs", "fur", "fy",
125 : "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
126 : "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
127 : "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
128 : "gur", "guz", "gv", "gwi",
129 : "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
130 : "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
131 : "hup", "hy", "hz",
132 : "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
133 : "ilo", "inh", "io", "is", "it", "iu", "izh",
134 : "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
135 : "jv",
136 : "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
137 : "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
138 : "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
139 : "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
140 : "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
141 : "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
142 : "kv", "kw", "ky",
143 : "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
144 : "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
145 : "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
146 : "lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
147 : "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
148 : "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
149 : "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
150 : "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj",
151 : "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
152 : "my", "mye", "myv", "mzn",
153 : "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
154 : "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
155 : "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
156 : "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
157 : "oc", "oj", "om", "or", "os", "osa", "ota",
158 : "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
159 : "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
160 : "pon", "prg", "pro", "ps", "pt",
161 : "qu", "quc", "qug",
162 : "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
163 : "rof", "rom", "rtm", "ru", "rue", "rug", "rup",
164 : "rw", "rwk",
165 : "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
166 : "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
167 : "se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
168 : "sgs", "shi", "shn", "shu", "si", "sid", "sk",
169 : "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
170 : "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
171 : "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
172 : "sv", "sw", "swb", "swc", "syc", "syr", "szl",
173 : "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
174 : "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
175 : "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
176 : "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
177 : "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
178 : "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
179 : "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
180 : "vot", "vro", "vun",
181 : "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
182 : "xal", "xh", "xmf", "xog",
183 : "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
184 : "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
185 : "zun", "zxx", "zza",
186 : NULL,
187 : "in", "iw", "ji", "jw", "sh", /* obsolete language codes */
188 : NULL
189 : };
190 :
191 : static const char* const DEPRECATED_LANGUAGES[]={
192 : "in", "iw", "ji", "jw", NULL, NULL
193 : };
194 : static const char* const REPLACEMENT_LANGUAGES[]={
195 : "id", "he", "yi", "jv", NULL, NULL
196 : };
197 :
198 : /**
199 : * Table of 3-letter language codes.
200 : *
201 : * This is a lookup table used to convert 3-letter language codes to
202 : * their 2-letter equivalent, where possible. It must be kept in sync
203 : * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
204 : * same language as LANGUAGES_3[i]. The commented-out lines are
205 : * copied from LANGUAGES to make eyeballing this baby easier.
206 : *
207 : * Where a 3-letter language code has no 2-letter equivalent, the
208 : * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
209 : *
210 : * This table should be terminated with a NULL entry, followed by a
211 : * second list, and another NULL entry. The two lists correspond to
212 : * the two lists in LANGUAGES.
213 : */
214 : /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
215 : /* ISO639 table version is 20150505 */
216 : static const char * const LANGUAGES_3[] = {
217 : "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
218 : "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
219 : "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
220 : "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
221 : "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
222 : "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
223 : "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
224 : "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
225 : "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
226 : "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
227 : "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
228 : "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
229 : "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
230 : "ces", "csb", "chu", "chv", "cym",
231 : "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
232 : "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
233 : "dyo", "dyu", "dzo", "dzg",
234 : "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
235 : "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
236 : "ext",
237 : "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
238 : "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
239 : "frs", "fur", "fry",
240 : "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
241 : "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
242 : "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
243 : "gur", "guz", "glv", "gwi",
244 : "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
245 : "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
246 : "hup", "hye", "her",
247 : "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
248 : "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
249 : "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
250 : "jav",
251 : "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
252 : "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
253 : "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
254 : "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
255 : "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
256 : "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
257 : "kom", "cor", "kir",
258 : "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
259 : "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
260 : "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
261 : "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
262 : "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
263 : "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
264 : "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
265 : "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
266 : "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
267 : "mya", "mye", "myv", "mzn",
268 : "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
269 : "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
270 : "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
271 : "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
272 : "oci", "oji", "orm", "ori", "oss", "osa", "ota",
273 : "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
274 : "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
275 : "pon", "prg", "pro", "pus", "por",
276 : "que", "quc", "qug",
277 : "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
278 : "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
279 : "kin", "rwk",
280 : "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
281 : "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
282 : "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
283 : "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
284 : "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
285 : "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
286 : "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
287 : "swe", "swa", "swb", "swc", "syc", "syr", "szl",
288 : "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
289 : "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
290 : "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
291 : "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
292 : "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
293 : "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
294 : "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
295 : "vot", "vro", "vun",
296 : "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
297 : "xal", "xho", "xmf", "xog",
298 : "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
299 : "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
300 : "zun", "zxx", "zza",
301 : NULL,
302 : /* "in", "iw", "ji", "jw", "sh", */
303 : "ind", "heb", "yid", "jaw", "srp",
304 : NULL
305 : };
306 :
307 : /**
308 : * Table of 2-letter country codes.
309 : *
310 : * This list must be in sorted order. This list is returned directly
311 : * to the user by some API.
312 : *
313 : * This list must be kept in sync with COUNTRIES_3, with corresponding
314 : * entries matched.
315 : *
316 : * This table should be terminated with a NULL entry, followed by a
317 : * second list, and another NULL entry. The first list is visible to
318 : * user code when this array is returned by API. The second list
319 : * contains codes we support, but do not expose through user API.
320 : *
321 : * Notes:
322 : *
323 : * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
324 : * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
325 : * new codes keeping the old ones for compatibility updated to include
326 : * 1999/12/03 revisions *CWB*
327 : *
328 : * RO(ROM) is now RO(ROU) according to
329 : * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
330 : */
331 : static const char * const COUNTRIES[] = {
332 : "AD", "AE", "AF", "AG", "AI", "AL", "AM",
333 : "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
334 : "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
335 : "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
336 : "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
337 : "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
338 : "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
339 : "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
340 : "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
341 : "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
342 : "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
343 : "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
344 : "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
345 : "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
346 : "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
347 : "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
348 : "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
349 : "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
350 : "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
351 : "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
352 : "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
353 : "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
354 : "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
355 : "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
356 : "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
357 : "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
358 : "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
359 : "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
360 : "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
361 : "WS", "YE", "YT", "ZA", "ZM", "ZW",
362 : NULL,
363 : "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
364 : NULL
365 : };
366 :
367 : static const char* const DEPRECATED_COUNTRIES[] = {
368 : "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
369 : };
370 : static const char* const REPLACEMENT_COUNTRIES[] = {
371 : /* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
372 : "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */
373 : };
374 :
375 : /**
376 : * Table of 3-letter country codes.
377 : *
378 : * This is a lookup table used to convert 3-letter country codes to
379 : * their 2-letter equivalent. It must be kept in sync with COUNTRIES.
380 : * For all valid i, COUNTRIES[i] must refer to the same country as
381 : * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
382 : * to make eyeballing this baby easier.
383 : *
384 : * This table should be terminated with a NULL entry, followed by a
385 : * second list, and another NULL entry. The two lists correspond to
386 : * the two lists in COUNTRIES.
387 : */
388 : static const char * const COUNTRIES_3[] = {
389 : /* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */
390 : "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
391 : /* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */
392 : "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
393 : /* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */
394 : "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
395 : /* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */
396 : "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
397 : /* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */
398 : "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
399 : /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
400 : "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
401 : /* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
402 : "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
403 : /* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
404 : "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
405 : /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
406 : "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
407 : /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
408 : "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
409 : /* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */
410 : "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
411 : /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
412 : "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
413 : /* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
414 : "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
415 : /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
416 : "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
417 : /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
418 : "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
419 : /* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */
420 : "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
421 : /* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */
422 : "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
423 : /* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */
424 : "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
425 : /* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */
426 : "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
427 : /* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */
428 : "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
429 : /* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */
430 : "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
431 : /* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */
432 : "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
433 : /* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */
434 : "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
435 : /* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */
436 : "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
437 : /* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */
438 : "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
439 : /* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */
440 : "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
441 : /* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */
442 : "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
443 : /* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */
444 : "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
445 : /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
446 : "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
447 : /* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
448 : "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
449 : NULL,
450 : /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
451 : "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
452 : NULL
453 : };
454 :
455 : typedef struct CanonicalizationMap {
456 : const char *id; /* input ID */
457 : const char *canonicalID; /* canonicalized output ID */
458 : const char *keyword; /* keyword, or NULL if none */
459 : const char *value; /* keyword value, or NULL if kw==NULL */
460 : } CanonicalizationMap;
461 :
462 : /**
463 : * A map to canonicalize locale IDs. This handles a variety of
464 : * different semantic kinds of transformations.
465 : */
466 : static const CanonicalizationMap CANONICALIZE_MAP[] = {
467 : { "", "en_US_POSIX", NULL, NULL }, /* .NET name */
468 : { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */
469 : { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
470 : { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
471 : { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
472 : { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
473 : { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
474 : { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */
475 : { "de_AT_PREEURO", "de_AT", "currency", "ATS" },
476 : { "de_DE_PREEURO", "de_DE", "currency", "DEM" },
477 : { "de_LU_PREEURO", "de_LU", "currency", "LUF" },
478 : { "el_GR_PREEURO", "el_GR", "currency", "GRD" },
479 : { "en_BE_PREEURO", "en_BE", "currency", "BEF" },
480 : { "en_IE_PREEURO", "en_IE", "currency", "IEP" },
481 : { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
482 : { "es_ES_PREEURO", "es_ES", "currency", "ESP" },
483 : { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
484 : { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
485 : { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
486 : { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
487 : { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
488 : { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
489 : { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
490 : { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */
491 : { "it_IT_PREEURO", "it_IT", "currency", "ITL" },
492 : { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
493 : { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
494 : { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
495 : { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
496 : { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
497 : { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
498 : { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */
499 : { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
500 : { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
501 : { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
502 : { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
503 : { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
504 : { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
505 : { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */
506 : { "zh_GAN", "gan", NULL, NULL }, /* registered name */
507 : { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
508 : { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */
509 : { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */
510 : { "zh_WUU", "wuu", NULL, NULL }, /* registered name */
511 : { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */
512 : { "zh_YUE", "yue", NULL, NULL }, /* registered name */
513 : };
514 :
515 : typedef struct VariantMap {
516 : const char *variant; /* input ID */
517 : const char *keyword; /* keyword, or NULL if none */
518 : const char *value; /* keyword value, or NULL if kw==NULL */
519 : } VariantMap;
520 :
521 : static const VariantMap VARIANT_MAP[] = {
522 : { "EURO", "currency", "EUR" },
523 : { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
524 : { "STROKE", "collation", "stroke" } /* Solaris variant */
525 : };
526 :
527 : /* ### BCP47 Conversion *******************************************/
528 : /* Test if the locale id has BCP47 u extension and does not have '@' */
529 : #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
530 : /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
531 : #define _ConvertBCP47(finalID, id, buffer, length,err) \
532 : if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
533 : finalID=id; \
534 : } else { \
535 : finalID=buffer; \
536 : }
537 : /* Gets the size of the shortest subtag in the given localeID. */
538 52 : static int32_t getShortestSubtagLength(const char *localeID) {
539 52 : int32_t localeIDLength = uprv_strlen(localeID);
540 52 : int32_t length = localeIDLength;
541 52 : int32_t tmpLength = 0;
542 : int32_t i;
543 52 : UBool reset = TRUE;
544 :
545 307 : for (i = 0; i < localeIDLength; i++) {
546 255 : if (localeID[i] != '_' && localeID[i] != '-') {
547 204 : if (reset) {
548 102 : tmpLength = 0;
549 102 : reset = FALSE;
550 : }
551 204 : tmpLength++;
552 : } else {
553 51 : if (tmpLength != 0 && tmpLength < length) {
554 51 : length = tmpLength;
555 : }
556 51 : reset = TRUE;
557 : }
558 : }
559 :
560 52 : return length;
561 : }
562 :
563 : /* ### Keywords **************************************************/
564 : #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
565 : #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
566 : /* Punctuation/symbols allowed in legacy key values */
567 : #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
568 :
569 : #define ULOC_KEYWORD_BUFFER_LEN 25
570 : #define ULOC_MAX_NO_KEYWORDS 25
571 :
572 : U_CAPI const char * U_EXPORT2
573 41 : locale_getKeywordsStart(const char *localeID) {
574 41 : const char *result = NULL;
575 41 : if((result = uprv_strchr(localeID, '@')) != NULL) {
576 0 : return result;
577 : }
578 : #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
579 : else {
580 : /* We do this because the @ sign is variant, and the @ sign used on one
581 : EBCDIC machine won't be compiled the same way on other EBCDIC based
582 : machines. */
583 : static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
584 : const uint8_t *charToFind = ebcdicSigns;
585 : while(*charToFind) {
586 : if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
587 : return result;
588 : }
589 : charToFind++;
590 : }
591 : }
592 : #endif
593 41 : return NULL;
594 : }
595 :
596 : /**
597 : * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
598 : * @param keywordName incoming name to be canonicalized
599 : * @param status return status (keyword too long)
600 : * @return length of the keyword name
601 : */
602 0 : static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
603 : {
604 0 : int32_t keywordNameLen = 0;
605 :
606 0 : for (; *keywordName != 0; keywordName++) {
607 0 : if (!UPRV_ISALPHANUM(*keywordName)) {
608 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
609 0 : return 0;
610 : }
611 0 : if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
612 0 : buf[keywordNameLen++] = uprv_tolower(*keywordName);
613 : } else {
614 : /* keyword name too long for internal buffer */
615 0 : *status = U_INTERNAL_PROGRAM_ERROR;
616 0 : return 0;
617 : }
618 : }
619 0 : if (keywordNameLen == 0) {
620 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
621 0 : return 0;
622 : }
623 0 : buf[keywordNameLen] = 0; /* terminate */
624 :
625 0 : return keywordNameLen;
626 : }
627 :
628 : typedef struct {
629 : char keyword[ULOC_KEYWORD_BUFFER_LEN];
630 : int32_t keywordLen;
631 : const char *valueStart;
632 : int32_t valueLen;
633 : } KeywordStruct;
634 :
635 : static int32_t U_CALLCONV
636 0 : compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
637 0 : const char* leftString = ((const KeywordStruct *)left)->keyword;
638 0 : const char* rightString = ((const KeywordStruct *)right)->keyword;
639 0 : return uprv_strcmp(leftString, rightString);
640 : }
641 :
642 : /**
643 : * Both addKeyword and addValue must already be in canonical form.
644 : * Either both addKeyword and addValue are NULL, or neither is NULL.
645 : * If they are not NULL they must be zero terminated.
646 : * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
647 : */
648 : static int32_t
649 0 : _getKeywords(const char *localeID,
650 : char prev,
651 : char *keywords, int32_t keywordCapacity,
652 : char *values, int32_t valuesCapacity, int32_t *valLen,
653 : UBool valuesToo,
654 : const char* addKeyword,
655 : const char* addValue,
656 : UErrorCode *status)
657 : {
658 : KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
659 :
660 0 : int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
661 0 : int32_t numKeywords = 0;
662 0 : const char* pos = localeID;
663 0 : const char* equalSign = NULL;
664 0 : const char* semicolon = NULL;
665 0 : int32_t i = 0, j, n;
666 0 : int32_t keywordsLen = 0;
667 0 : int32_t valuesLen = 0;
668 :
669 0 : if(prev == '@') { /* start of keyword definition */
670 : /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
671 0 : do {
672 0 : UBool duplicate = FALSE;
673 : /* skip leading spaces */
674 0 : while(*pos == ' ') {
675 0 : pos++;
676 : }
677 0 : if (!*pos) { /* handle trailing "; " */
678 0 : break;
679 : }
680 0 : if(numKeywords == maxKeywords) {
681 0 : *status = U_INTERNAL_PROGRAM_ERROR;
682 0 : return 0;
683 : }
684 0 : equalSign = uprv_strchr(pos, '=');
685 0 : semicolon = uprv_strchr(pos, ';');
686 : /* lack of '=' [foo@currency] is illegal */
687 : /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
688 0 : if(!equalSign || (semicolon && semicolon<equalSign)) {
689 0 : *status = U_INVALID_FORMAT_ERROR;
690 0 : return 0;
691 : }
692 : /* need to normalize both keyword and keyword name */
693 0 : if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
694 : /* keyword name too long for internal buffer */
695 0 : *status = U_INTERNAL_PROGRAM_ERROR;
696 0 : return 0;
697 : }
698 0 : for(i = 0, n = 0; i < equalSign - pos; ++i) {
699 0 : if (pos[i] != ' ') {
700 0 : keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
701 : }
702 : }
703 :
704 : /* zero-length keyword is an error. */
705 0 : if (n == 0) {
706 0 : *status = U_INVALID_FORMAT_ERROR;
707 0 : return 0;
708 : }
709 :
710 0 : keywordList[numKeywords].keyword[n] = 0;
711 0 : keywordList[numKeywords].keywordLen = n;
712 : /* now grab the value part. First we skip the '=' */
713 0 : equalSign++;
714 : /* then we leading spaces */
715 0 : while(*equalSign == ' ') {
716 0 : equalSign++;
717 : }
718 :
719 : /* Premature end or zero-length value */
720 0 : if (!*equalSign || equalSign == semicolon) {
721 0 : *status = U_INVALID_FORMAT_ERROR;
722 0 : return 0;
723 : }
724 :
725 0 : keywordList[numKeywords].valueStart = equalSign;
726 :
727 0 : pos = semicolon;
728 0 : i = 0;
729 0 : if(pos) {
730 0 : while(*(pos - i - 1) == ' ') {
731 0 : i++;
732 : }
733 0 : keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
734 0 : pos++;
735 : } else {
736 0 : i = (int32_t)uprv_strlen(equalSign);
737 0 : while(i && equalSign[i-1] == ' ') {
738 0 : i--;
739 : }
740 0 : keywordList[numKeywords].valueLen = i;
741 : }
742 : /* If this is a duplicate keyword, then ignore it */
743 0 : for (j=0; j<numKeywords; ++j) {
744 0 : if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
745 0 : duplicate = TRUE;
746 0 : break;
747 : }
748 : }
749 0 : if (!duplicate) {
750 0 : ++numKeywords;
751 : }
752 0 : } while(pos);
753 :
754 : /* Handle addKeyword/addValue. */
755 0 : if (addKeyword != NULL) {
756 0 : UBool duplicate = FALSE;
757 0 : U_ASSERT(addValue != NULL);
758 : /* Search for duplicate; if found, do nothing. Explicit keyword
759 : overrides addKeyword. */
760 0 : for (j=0; j<numKeywords; ++j) {
761 0 : if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
762 0 : duplicate = TRUE;
763 0 : break;
764 : }
765 : }
766 0 : if (!duplicate) {
767 0 : if (numKeywords == maxKeywords) {
768 0 : *status = U_INTERNAL_PROGRAM_ERROR;
769 0 : return 0;
770 : }
771 0 : uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
772 0 : keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
773 0 : keywordList[numKeywords].valueStart = addValue;
774 0 : keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
775 0 : ++numKeywords;
776 : }
777 : } else {
778 0 : U_ASSERT(addValue == NULL);
779 : }
780 :
781 : /* now we have a list of keywords */
782 : /* we need to sort it */
783 0 : uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
784 :
785 : /* Now construct the keyword part */
786 0 : for(i = 0; i < numKeywords; i++) {
787 0 : if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
788 0 : uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
789 0 : if(valuesToo) {
790 0 : keywords[keywordsLen + keywordList[i].keywordLen] = '=';
791 : } else {
792 0 : keywords[keywordsLen + keywordList[i].keywordLen] = 0;
793 : }
794 : }
795 0 : keywordsLen += keywordList[i].keywordLen + 1;
796 0 : if(valuesToo) {
797 0 : if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
798 0 : uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
799 : }
800 0 : keywordsLen += keywordList[i].valueLen;
801 :
802 0 : if(i < numKeywords - 1) {
803 0 : if(keywordsLen < keywordCapacity) {
804 0 : keywords[keywordsLen] = ';';
805 : }
806 0 : keywordsLen++;
807 : }
808 : }
809 0 : if(values) {
810 0 : if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
811 0 : uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
812 0 : values[valuesLen + keywordList[i].valueLen] = 0;
813 : }
814 0 : valuesLen += keywordList[i].valueLen + 1;
815 : }
816 : }
817 0 : if(values) {
818 0 : values[valuesLen] = 0;
819 0 : if(valLen) {
820 0 : *valLen = valuesLen;
821 : }
822 : }
823 0 : return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
824 : } else {
825 0 : return 0;
826 : }
827 : }
828 :
829 : U_CFUNC int32_t
830 0 : locale_getKeywords(const char *localeID,
831 : char prev,
832 : char *keywords, int32_t keywordCapacity,
833 : char *values, int32_t valuesCapacity, int32_t *valLen,
834 : UBool valuesToo,
835 : UErrorCode *status) {
836 0 : return _getKeywords(localeID, prev, keywords, keywordCapacity,
837 : values, valuesCapacity, valLen, valuesToo,
838 0 : NULL, NULL, status);
839 : }
840 :
841 : U_CAPI int32_t U_EXPORT2
842 0 : uloc_getKeywordValue(const char* localeID,
843 : const char* keywordName,
844 : char* buffer, int32_t bufferCapacity,
845 : UErrorCode* status)
846 : {
847 0 : const char* startSearchHere = NULL;
848 0 : const char* nextSeparator = NULL;
849 : char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
850 : char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
851 0 : int32_t result = 0;
852 :
853 0 : if(status && U_SUCCESS(*status) && localeID) {
854 : char tempBuffer[ULOC_FULLNAME_CAPACITY];
855 : const char* tmpLocaleID;
856 :
857 0 : if (keywordName == NULL || keywordName[0] == 0) {
858 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
859 0 : return 0;
860 : }
861 :
862 0 : locale_canonKeywordName(keywordNameBuffer, keywordName, status);
863 0 : if(U_FAILURE(*status)) {
864 0 : return 0;
865 : }
866 :
867 0 : if (_hasBCP47Extension(localeID)) {
868 0 : _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
869 : } else {
870 0 : tmpLocaleID=localeID;
871 : }
872 :
873 0 : startSearchHere = locale_getKeywordsStart(tmpLocaleID);
874 0 : if(startSearchHere == NULL) {
875 : /* no keywords, return at once */
876 0 : return 0;
877 : }
878 :
879 : /* find the first keyword */
880 0 : while(startSearchHere) {
881 : const char* keyValueTail;
882 : int32_t keyValueLen;
883 :
884 0 : startSearchHere++; /* skip @ or ; */
885 0 : nextSeparator = uprv_strchr(startSearchHere, '=');
886 0 : if(!nextSeparator) {
887 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
888 0 : return 0;
889 : }
890 : /* strip leading & trailing spaces (TC decided to tolerate these) */
891 0 : while(*startSearchHere == ' ') {
892 0 : startSearchHere++;
893 : }
894 0 : keyValueTail = nextSeparator;
895 0 : while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
896 0 : keyValueTail--;
897 : }
898 : /* now keyValueTail points to first char after the keyName */
899 : /* copy & normalize keyName from locale */
900 0 : if (startSearchHere == keyValueTail) {
901 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
902 0 : return 0;
903 : }
904 0 : keyValueLen = 0;
905 0 : while (startSearchHere < keyValueTail) {
906 0 : if (!UPRV_ISALPHANUM(*startSearchHere)) {
907 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
908 0 : return 0;
909 : }
910 0 : if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
911 0 : localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
912 : } else {
913 : /* keyword name too long for internal buffer */
914 0 : *status = U_INTERNAL_PROGRAM_ERROR;
915 0 : return 0;
916 : }
917 : }
918 0 : localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
919 :
920 0 : startSearchHere = uprv_strchr(nextSeparator, ';');
921 :
922 0 : if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
923 : /* current entry matches the keyword. */
924 0 : nextSeparator++; /* skip '=' */
925 : /* First strip leading & trailing spaces (TC decided to tolerate these) */
926 0 : while(*nextSeparator == ' ') {
927 0 : nextSeparator++;
928 : }
929 0 : keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
930 0 : while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
931 0 : keyValueTail--;
932 : }
933 : /* Now copy the value, but check well-formedness */
934 0 : if (nextSeparator == keyValueTail) {
935 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
936 0 : return 0;
937 : }
938 0 : keyValueLen = 0;
939 0 : while (nextSeparator < keyValueTail) {
940 0 : if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
941 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
942 0 : return 0;
943 : }
944 0 : if (keyValueLen < bufferCapacity) {
945 : /* Should we lowercase value to return here? Tests expect as-is. */
946 0 : buffer[keyValueLen++] = *nextSeparator++;
947 : } else { /* keep advancing so we return correct length in case of overflow */
948 0 : keyValueLen++;
949 0 : nextSeparator++;
950 : }
951 : }
952 0 : result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
953 0 : return result;
954 : }
955 : }
956 : }
957 0 : return 0;
958 : }
959 :
960 : U_CAPI int32_t U_EXPORT2
961 0 : uloc_setKeywordValue(const char* keywordName,
962 : const char* keywordValue,
963 : char* buffer, int32_t bufferCapacity,
964 : UErrorCode* status)
965 : {
966 : /* TODO: sorting. removal. */
967 : int32_t keywordNameLen;
968 : int32_t keywordValueLen;
969 : int32_t bufLen;
970 0 : int32_t needLen = 0;
971 : char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
972 : char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
973 : char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
974 : int32_t rc;
975 0 : char* nextSeparator = NULL;
976 0 : char* nextEqualsign = NULL;
977 0 : char* startSearchHere = NULL;
978 0 : char* keywordStart = NULL;
979 0 : CharString updatedKeysAndValues;
980 : int32_t updatedKeysAndValuesLen;
981 0 : UBool handledInputKeyAndValue = FALSE;
982 0 : char keyValuePrefix = '@';
983 :
984 0 : if(U_FAILURE(*status)) {
985 0 : return -1;
986 : }
987 0 : if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
988 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
989 0 : return 0;
990 : }
991 0 : bufLen = (int32_t)uprv_strlen(buffer);
992 0 : if(bufferCapacity<bufLen) {
993 : /* The capacity is less than the length?! Is this NULL terminated? */
994 0 : *status = U_ILLEGAL_ARGUMENT_ERROR;
995 0 : return 0;
996 : }
997 0 : keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
998 0 : if(U_FAILURE(*status)) {
999 0 : return 0;
1000 : }
1001 :
1002 0 : keywordValueLen = 0;
1003 0 : if(keywordValue) {
1004 0 : while (*keywordValue != 0) {
1005 0 : if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1006 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1007 0 : return 0;
1008 : }
1009 0 : if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1010 : /* Should we force lowercase in value to set? */
1011 0 : keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1012 : } else {
1013 : /* keywordValue too long for internal buffer */
1014 0 : *status = U_INTERNAL_PROGRAM_ERROR;
1015 0 : return 0;
1016 : }
1017 : }
1018 : }
1019 0 : keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1020 :
1021 0 : startSearchHere = (char*)locale_getKeywordsStart(buffer);
1022 0 : if(startSearchHere == NULL || (startSearchHere[1]==0)) {
1023 0 : if(keywordValueLen == 0) { /* no keywords = nothing to remove */
1024 0 : return bufLen;
1025 : }
1026 :
1027 0 : needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1028 0 : if(startSearchHere) { /* had a single @ */
1029 0 : needLen--; /* already had the @ */
1030 : /* startSearchHere points at the @ */
1031 : } else {
1032 0 : startSearchHere=buffer+bufLen;
1033 : }
1034 0 : if(needLen >= bufferCapacity) {
1035 0 : *status = U_BUFFER_OVERFLOW_ERROR;
1036 0 : return needLen; /* no change */
1037 : }
1038 0 : *startSearchHere++ = '@';
1039 0 : uprv_strcpy(startSearchHere, keywordNameBuffer);
1040 0 : startSearchHere += keywordNameLen;
1041 0 : *startSearchHere++ = '=';
1042 0 : uprv_strcpy(startSearchHere, keywordValueBuffer);
1043 0 : return needLen;
1044 : } /* end shortcut - no @ */
1045 :
1046 0 : keywordStart = startSearchHere;
1047 : /* search for keyword */
1048 0 : while(keywordStart) {
1049 : const char* keyValueTail;
1050 : int32_t keyValueLen;
1051 :
1052 0 : keywordStart++; /* skip @ or ; */
1053 0 : nextEqualsign = uprv_strchr(keywordStart, '=');
1054 0 : if (!nextEqualsign) {
1055 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1056 0 : return 0;
1057 : }
1058 : /* strip leading & trailing spaces (TC decided to tolerate these) */
1059 0 : while(*keywordStart == ' ') {
1060 0 : keywordStart++;
1061 : }
1062 0 : keyValueTail = nextEqualsign;
1063 0 : while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1064 0 : keyValueTail--;
1065 : }
1066 : /* now keyValueTail points to first char after the keyName */
1067 : /* copy & normalize keyName from locale */
1068 0 : if (keywordStart == keyValueTail) {
1069 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1070 0 : return 0;
1071 : }
1072 0 : keyValueLen = 0;
1073 0 : while (keywordStart < keyValueTail) {
1074 0 : if (!UPRV_ISALPHANUM(*keywordStart)) {
1075 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1076 0 : return 0;
1077 : }
1078 0 : if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1079 0 : localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1080 : } else {
1081 : /* keyword name too long for internal buffer */
1082 0 : *status = U_INTERNAL_PROGRAM_ERROR;
1083 0 : return 0;
1084 : }
1085 : }
1086 0 : localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1087 :
1088 0 : nextSeparator = uprv_strchr(nextEqualsign, ';');
1089 :
1090 : /* start processing the value part */
1091 0 : nextEqualsign++; /* skip '=' */
1092 : /* First strip leading & trailing spaces (TC decided to tolerate these) */
1093 0 : while(*nextEqualsign == ' ') {
1094 0 : nextEqualsign++;
1095 : }
1096 0 : keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1097 0 : while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1098 0 : keyValueTail--;
1099 : }
1100 0 : if (nextEqualsign == keyValueTail) {
1101 0 : *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1102 0 : return 0;
1103 : }
1104 :
1105 0 : rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1106 0 : if(rc == 0) {
1107 : /* Current entry matches the input keyword. Update the entry */
1108 0 : if(keywordValueLen > 0) { /* updating a value */
1109 0 : updatedKeysAndValues.append(keyValuePrefix, *status);
1110 0 : keyValuePrefix = ';'; /* for any subsequent key-value pair */
1111 0 : updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1112 0 : updatedKeysAndValues.append('=', *status);
1113 0 : updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1114 : } /* else removing this entry, don't emit anything */
1115 0 : handledInputKeyAndValue = TRUE;
1116 : } else {
1117 : /* input keyword sorts earlier than current entry, add before current entry */
1118 0 : if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1119 : /* insert new entry at this location */
1120 0 : updatedKeysAndValues.append(keyValuePrefix, *status);
1121 0 : keyValuePrefix = ';'; /* for any subsequent key-value pair */
1122 0 : updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1123 0 : updatedKeysAndValues.append('=', *status);
1124 0 : updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1125 0 : handledInputKeyAndValue = TRUE;
1126 : }
1127 : /* copy the current entry */
1128 0 : updatedKeysAndValues.append(keyValuePrefix, *status);
1129 0 : keyValuePrefix = ';'; /* for any subsequent key-value pair */
1130 0 : updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1131 0 : updatedKeysAndValues.append('=', *status);
1132 0 : updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
1133 : }
1134 0 : if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1135 : /* append new entry at the end, it sorts later than existing entries */
1136 0 : updatedKeysAndValues.append(keyValuePrefix, *status);
1137 : /* skip keyValuePrefix update, no subsequent key-value pair */
1138 0 : updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1139 0 : updatedKeysAndValues.append('=', *status);
1140 0 : updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1141 0 : handledInputKeyAndValue = TRUE;
1142 : }
1143 0 : keywordStart = nextSeparator;
1144 : } /* end loop searching */
1145 :
1146 : /* Any error from updatedKeysAndValues.append above would be internal and not due to
1147 : * problems with the passed-in locale. So if we did encounter problems with the
1148 : * passed-in locale above, those errors took precedence and overrode any error
1149 : * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1150 : * are errors here they are from updatedKeysAndValues.append; they do cause an
1151 : * error return but the passed-in locale is unmodified and the original bufLen is
1152 : * returned.
1153 : */
1154 0 : if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1155 : /* if input key/value specified removal of a keyword not present in locale, or
1156 : * there was an error in CharString.append, leave original locale alone. */
1157 0 : return bufLen;
1158 : }
1159 :
1160 0 : updatedKeysAndValuesLen = updatedKeysAndValues.length();
1161 : /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1162 0 : needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1163 0 : if(needLen >= bufferCapacity) {
1164 0 : *status = U_BUFFER_OVERFLOW_ERROR;
1165 0 : return needLen; /* no change */
1166 : }
1167 0 : if (updatedKeysAndValuesLen > 0) {
1168 0 : uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1169 : }
1170 0 : buffer[needLen]=0;
1171 0 : return needLen;
1172 : }
1173 :
1174 : /* ### ID parsing implementation **************************************************/
1175 :
1176 : #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1177 :
1178 : /*returns TRUE if one of the special prefixes is here (s=string)
1179 : 'x-' or 'i-' */
1180 : #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1181 :
1182 : /* Dot terminates it because of POSIX form where dot precedes the codepage
1183 : * except for variant
1184 : */
1185 : #define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
1186 :
1187 0 : static char* _strnchr(const char* str, int32_t len, char c) {
1188 0 : U_ASSERT(str != 0 && len >= 0);
1189 0 : while (len-- != 0) {
1190 0 : char d = *str;
1191 0 : if (d == c) {
1192 0 : return (char*) str;
1193 0 : } else if (d == 0) {
1194 0 : break;
1195 : }
1196 0 : ++str;
1197 : }
1198 0 : return NULL;
1199 : }
1200 :
1201 : /**
1202 : * Lookup 'key' in the array 'list'. The array 'list' should contain
1203 : * a NULL entry, followed by more entries, and a second NULL entry.
1204 : *
1205 : * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1206 : * COUNTRIES_3.
1207 : */
1208 0 : static int16_t _findIndex(const char* const* list, const char* key)
1209 : {
1210 0 : const char* const* anchor = list;
1211 0 : int32_t pass = 0;
1212 :
1213 : /* Make two passes through two NULL-terminated arrays at 'list' */
1214 0 : while (pass++ < 2) {
1215 0 : while (*list) {
1216 0 : if (uprv_strcmp(key, *list) == 0) {
1217 0 : return (int16_t)(list - anchor);
1218 : }
1219 0 : list++;
1220 : }
1221 0 : ++list; /* skip final NULL *CWB*/
1222 : }
1223 0 : return -1;
1224 : }
1225 :
1226 : /* count the length of src while copying it to dest; return strlen(src) */
1227 : static inline int32_t
1228 0 : _copyCount(char *dest, int32_t destCapacity, const char *src) {
1229 : const char *anchor;
1230 : char c;
1231 :
1232 0 : anchor=src;
1233 : for(;;) {
1234 0 : if((c=*src)==0) {
1235 0 : return (int32_t)(src-anchor);
1236 : }
1237 0 : if(destCapacity<=0) {
1238 0 : return (int32_t)((src-anchor)+uprv_strlen(src));
1239 : }
1240 0 : ++src;
1241 0 : *dest++=c;
1242 0 : --destCapacity;
1243 : }
1244 : }
1245 :
1246 : U_CFUNC const char*
1247 0 : uloc_getCurrentCountryID(const char* oldID){
1248 0 : int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1249 0 : if (offset >= 0) {
1250 0 : return REPLACEMENT_COUNTRIES[offset];
1251 : }
1252 0 : return oldID;
1253 : }
1254 : U_CFUNC const char*
1255 0 : uloc_getCurrentLanguageID(const char* oldID){
1256 0 : int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1257 0 : if (offset >= 0) {
1258 0 : return REPLACEMENT_LANGUAGES[offset];
1259 : }
1260 0 : return oldID;
1261 : }
1262 : /*
1263 : * the internal functions _getLanguage(), _getCountry(), _getVariant()
1264 : * avoid duplicating code to handle the earlier locale ID pieces
1265 : * in the functions for the later ones by
1266 : * setting the *pEnd pointer to where they stopped parsing
1267 : *
1268 : * TODO try to use this in Locale
1269 : */
1270 : U_CFUNC int32_t
1271 99 : ulocimp_getLanguage(const char *localeID,
1272 : char *language, int32_t languageCapacity,
1273 : const char **pEnd) {
1274 99 : int32_t i=0;
1275 : int32_t offset;
1276 99 : char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1277 :
1278 : /* if it starts with i- or x- then copy that prefix */
1279 99 : if(_isIDPrefix(localeID)) {
1280 0 : if(i<languageCapacity) {
1281 0 : language[i]=(char)uprv_tolower(*localeID);
1282 : }
1283 0 : if(i<languageCapacity) {
1284 0 : language[i+1]='-';
1285 : }
1286 0 : i+=2;
1287 0 : localeID+=2;
1288 : }
1289 :
1290 : /* copy the language as far as possible and count its length */
1291 491 : while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1292 196 : if(i<languageCapacity) {
1293 74 : language[i]=(char)uprv_tolower(*localeID);
1294 : }
1295 196 : if(i<3) {
1296 196 : U_ASSERT(i>=0);
1297 196 : lang[i]=(char)uprv_tolower(*localeID);
1298 : }
1299 196 : i++;
1300 196 : localeID++;
1301 : }
1302 :
1303 99 : if(i==3) {
1304 : /* convert 3 character code to 2 character code if possible *CWB*/
1305 0 : offset=_findIndex(LANGUAGES_3, lang);
1306 0 : if(offset>=0) {
1307 0 : i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1308 : }
1309 : }
1310 :
1311 99 : if(pEnd!=NULL) {
1312 82 : *pEnd=localeID;
1313 : }
1314 99 : return i;
1315 : }
1316 :
1317 : U_CFUNC int32_t
1318 81 : ulocimp_getScript(const char *localeID,
1319 : char *script, int32_t scriptCapacity,
1320 : const char **pEnd)
1321 : {
1322 81 : int32_t idLen = 0;
1323 :
1324 81 : if (pEnd != NULL) {
1325 64 : *pEnd = localeID;
1326 : }
1327 :
1328 : /* copy the second item as far as possible and count its length */
1329 828 : while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1330 413 : && uprv_isASCIILetter(localeID[idLen])) {
1331 166 : idLen++;
1332 : }
1333 :
1334 : /* If it's exactly 4 characters long, then it's a script and not a country. */
1335 81 : if (idLen == 4) {
1336 : int32_t i;
1337 2 : if (pEnd != NULL) {
1338 0 : *pEnd = localeID+idLen;
1339 : }
1340 2 : if(idLen > scriptCapacity) {
1341 0 : idLen = scriptCapacity;
1342 : }
1343 2 : if (idLen >= 1) {
1344 2 : script[0]=(char)uprv_toupper(*(localeID++));
1345 : }
1346 8 : for (i = 1; i < idLen; i++) {
1347 6 : script[i]=(char)uprv_tolower(*(localeID++));
1348 : }
1349 : }
1350 : else {
1351 79 : idLen = 0;
1352 : }
1353 81 : return idLen;
1354 : }
1355 :
1356 : U_CFUNC int32_t
1357 64 : ulocimp_getCountry(const char *localeID,
1358 : char *country, int32_t countryCapacity,
1359 : const char **pEnd)
1360 : {
1361 64 : int32_t idLen=0;
1362 64 : char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1363 : int32_t offset;
1364 :
1365 : /* copy the country as far as possible and count its length */
1366 320 : while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1367 128 : if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
1368 128 : cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1369 : }
1370 128 : idLen++;
1371 : }
1372 :
1373 : /* the country should be either length 2 or 3 */
1374 64 : if (idLen == 2 || idLen == 3) {
1375 64 : UBool gotCountry = FALSE;
1376 : /* convert 3 character code to 2 character code if possible *CWB*/
1377 64 : if(idLen==3) {
1378 0 : offset=_findIndex(COUNTRIES_3, cnty);
1379 0 : if(offset>=0) {
1380 0 : idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1381 0 : gotCountry = TRUE;
1382 : }
1383 : }
1384 64 : if (!gotCountry) {
1385 64 : int32_t i = 0;
1386 192 : for (i = 0; i < idLen; i++) {
1387 128 : if (i < countryCapacity) {
1388 62 : country[i]=(char)uprv_toupper(localeID[i]);
1389 : }
1390 : }
1391 : }
1392 64 : localeID+=idLen;
1393 : } else {
1394 0 : idLen = 0;
1395 : }
1396 :
1397 64 : if(pEnd!=NULL) {
1398 53 : *pEnd=localeID;
1399 : }
1400 :
1401 64 : return idLen;
1402 : }
1403 :
1404 : /**
1405 : * @param needSeparator if true, then add leading '_' if any variants
1406 : * are added to 'variant'
1407 : */
1408 : static int32_t
1409 0 : _getVariantEx(const char *localeID,
1410 : char prev,
1411 : char *variant, int32_t variantCapacity,
1412 : UBool needSeparator) {
1413 0 : int32_t i=0;
1414 :
1415 : /* get one or more variant tags and separate them with '_' */
1416 0 : if(_isIDSeparator(prev)) {
1417 : /* get a variant string after a '-' or '_' */
1418 0 : while(!_isTerminator(*localeID)) {
1419 0 : if (needSeparator) {
1420 0 : if (i<variantCapacity) {
1421 0 : variant[i] = '_';
1422 : }
1423 0 : ++i;
1424 0 : needSeparator = FALSE;
1425 : }
1426 0 : if(i<variantCapacity) {
1427 0 : variant[i]=(char)uprv_toupper(*localeID);
1428 0 : if(variant[i]=='-') {
1429 0 : variant[i]='_';
1430 : }
1431 : }
1432 0 : i++;
1433 0 : localeID++;
1434 : }
1435 : }
1436 :
1437 : /* if there is no variant tag after a '-' or '_' then look for '@' */
1438 0 : if(i==0) {
1439 0 : if(prev=='@') {
1440 : /* keep localeID */
1441 0 : } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1442 0 : ++localeID; /* point after the '@' */
1443 : } else {
1444 0 : return 0;
1445 : }
1446 0 : while(!_isTerminator(*localeID)) {
1447 0 : if (needSeparator) {
1448 0 : if (i<variantCapacity) {
1449 0 : variant[i] = '_';
1450 : }
1451 0 : ++i;
1452 0 : needSeparator = FALSE;
1453 : }
1454 0 : if(i<variantCapacity) {
1455 0 : variant[i]=(char)uprv_toupper(*localeID);
1456 0 : if(variant[i]=='-' || variant[i]==',') {
1457 0 : variant[i]='_';
1458 : }
1459 : }
1460 0 : i++;
1461 0 : localeID++;
1462 : }
1463 : }
1464 :
1465 0 : return i;
1466 : }
1467 :
1468 : static int32_t
1469 0 : _getVariant(const char *localeID,
1470 : char prev,
1471 : char *variant, int32_t variantCapacity) {
1472 0 : return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1473 : }
1474 :
1475 : /**
1476 : * Delete ALL instances of a variant from the given list of one or
1477 : * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1478 : * @param variants the source string of one or more variants,
1479 : * separated by '_'. This will be MODIFIED IN PLACE. Not zero
1480 : * terminated; if it is, trailing zero will NOT be maintained.
1481 : * @param variantsLen length of variants
1482 : * @param toDelete variant to delete, without separators, e.g. "EURO"
1483 : * or "PREEURO"; not zero terminated
1484 : * @param toDeleteLen length of toDelete
1485 : * @return number of characters deleted from variants
1486 : */
1487 : static int32_t
1488 0 : _deleteVariant(char* variants, int32_t variantsLen,
1489 : const char* toDelete, int32_t toDeleteLen)
1490 : {
1491 0 : int32_t delta = 0; /* number of chars deleted */
1492 : for (;;) {
1493 0 : UBool flag = FALSE;
1494 0 : if (variantsLen < toDeleteLen) {
1495 0 : return delta;
1496 : }
1497 0 : if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1498 0 : (variantsLen == toDeleteLen ||
1499 0 : (flag=(variants[toDeleteLen] == '_'))))
1500 : {
1501 0 : int32_t d = toDeleteLen + (flag?1:0);
1502 0 : variantsLen -= d;
1503 0 : delta += d;
1504 0 : if (variantsLen > 0) {
1505 0 : uprv_memmove(variants, variants+d, variantsLen);
1506 : }
1507 : } else {
1508 0 : char* p = _strnchr(variants, variantsLen, '_');
1509 0 : if (p == NULL) {
1510 0 : return delta;
1511 : }
1512 0 : ++p;
1513 0 : variantsLen -= (int32_t)(p - variants);
1514 0 : variants = p;
1515 : }
1516 0 : }
1517 : }
1518 :
1519 : /* Keyword enumeration */
1520 :
1521 : typedef struct UKeywordsContext {
1522 : char* keywords;
1523 : char* current;
1524 : } UKeywordsContext;
1525 :
1526 : U_CDECL_BEGIN
1527 :
1528 : static void U_CALLCONV
1529 0 : uloc_kw_closeKeywords(UEnumeration *enumerator) {
1530 0 : uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1531 0 : uprv_free(enumerator->context);
1532 0 : uprv_free(enumerator);
1533 0 : }
1534 :
1535 : static int32_t U_CALLCONV
1536 0 : uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1537 0 : char *kw = ((UKeywordsContext *)en->context)->keywords;
1538 0 : int32_t result = 0;
1539 0 : while(*kw) {
1540 0 : result++;
1541 0 : kw += uprv_strlen(kw)+1;
1542 : }
1543 0 : return result;
1544 : }
1545 :
1546 : static const char * U_CALLCONV
1547 0 : uloc_kw_nextKeyword(UEnumeration* en,
1548 : int32_t* resultLength,
1549 : UErrorCode* /*status*/) {
1550 0 : const char* result = ((UKeywordsContext *)en->context)->current;
1551 0 : int32_t len = 0;
1552 0 : if(*result) {
1553 0 : len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1554 0 : ((UKeywordsContext *)en->context)->current += len+1;
1555 : } else {
1556 0 : result = NULL;
1557 : }
1558 0 : if (resultLength) {
1559 0 : *resultLength = len;
1560 : }
1561 0 : return result;
1562 : }
1563 :
1564 : static void U_CALLCONV
1565 0 : uloc_kw_resetKeywords(UEnumeration* en,
1566 : UErrorCode* /*status*/) {
1567 0 : ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1568 0 : }
1569 :
1570 : U_CDECL_END
1571 :
1572 :
1573 : static const UEnumeration gKeywordsEnum = {
1574 : NULL,
1575 : NULL,
1576 : uloc_kw_closeKeywords,
1577 : uloc_kw_countKeywords,
1578 : uenum_unextDefault,
1579 : uloc_kw_nextKeyword,
1580 : uloc_kw_resetKeywords
1581 : };
1582 :
1583 : U_CAPI UEnumeration* U_EXPORT2
1584 0 : uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1585 : {
1586 0 : UKeywordsContext *myContext = NULL;
1587 0 : UEnumeration *result = NULL;
1588 :
1589 0 : if(U_FAILURE(*status)) {
1590 0 : return NULL;
1591 : }
1592 0 : result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1593 : /* Null pointer test */
1594 0 : if (result == NULL) {
1595 0 : *status = U_MEMORY_ALLOCATION_ERROR;
1596 0 : return NULL;
1597 : }
1598 0 : uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1599 0 : myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1600 0 : if (myContext == NULL) {
1601 0 : *status = U_MEMORY_ALLOCATION_ERROR;
1602 0 : uprv_free(result);
1603 0 : return NULL;
1604 : }
1605 0 : myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1606 0 : uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1607 0 : myContext->keywords[keywordListSize] = 0;
1608 0 : myContext->current = myContext->keywords;
1609 0 : result->context = myContext;
1610 0 : return result;
1611 : }
1612 :
1613 : U_CAPI UEnumeration* U_EXPORT2
1614 11 : uloc_openKeywords(const char* localeID,
1615 : UErrorCode* status)
1616 : {
1617 11 : int32_t i=0;
1618 : char keywords[256];
1619 11 : int32_t keywordsCapacity = 256;
1620 : char tempBuffer[ULOC_FULLNAME_CAPACITY];
1621 : const char* tmpLocaleID;
1622 :
1623 11 : if(status==NULL || U_FAILURE(*status)) {
1624 0 : return 0;
1625 : }
1626 :
1627 11 : if (_hasBCP47Extension(localeID)) {
1628 0 : _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1629 : } else {
1630 11 : if (localeID==NULL) {
1631 0 : localeID=uloc_getDefault();
1632 : }
1633 11 : tmpLocaleID=localeID;
1634 : }
1635 :
1636 : /* Skip the language */
1637 11 : ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1638 11 : if(_isIDSeparator(*tmpLocaleID)) {
1639 : const char *scriptID;
1640 : /* Skip the script if available */
1641 11 : ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1642 11 : if(scriptID != tmpLocaleID+1) {
1643 : /* Found optional script */
1644 0 : tmpLocaleID = scriptID;
1645 : }
1646 : /* Skip the Country */
1647 11 : if (_isIDSeparator(*tmpLocaleID)) {
1648 11 : ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1649 11 : if(_isIDSeparator(*tmpLocaleID)) {
1650 0 : _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1651 : }
1652 : }
1653 : }
1654 :
1655 : /* keywords are located after '@' */
1656 11 : if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1657 0 : i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1658 : }
1659 :
1660 11 : if(i) {
1661 0 : return uloc_openKeywordList(keywords, i, status);
1662 : } else {
1663 11 : return NULL;
1664 : }
1665 : }
1666 :
1667 :
1668 : /* bit-flags for 'options' parameter of _canonicalize */
1669 : #define _ULOC_STRIP_KEYWORDS 0x2
1670 : #define _ULOC_CANONICALIZE 0x1
1671 :
1672 : #define OPTION_SET(options, mask) ((options & mask) != 0)
1673 :
1674 : static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1675 : #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1676 :
1677 : /**
1678 : * Canonicalize the given localeID, to level 1 or to level 2,
1679 : * depending on the options. To specify level 1, pass in options=0.
1680 : * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1681 : *
1682 : * This is the code underlying uloc_getName and uloc_canonicalize.
1683 : */
1684 : static int32_t
1685 19 : _canonicalize(const char* localeID,
1686 : char* result,
1687 : int32_t resultCapacity,
1688 : uint32_t options,
1689 : UErrorCode* err) {
1690 19 : int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1691 : char localeBuffer[ULOC_FULLNAME_CAPACITY];
1692 : char tempBuffer[ULOC_FULLNAME_CAPACITY];
1693 : const char* origLocaleID;
1694 : const char* tmpLocaleID;
1695 19 : const char* keywordAssign = NULL;
1696 19 : const char* separatorIndicator = NULL;
1697 19 : const char* addKeyword = NULL;
1698 19 : const char* addValue = NULL;
1699 : char* name;
1700 19 : char* variant = NULL; /* pointer into name, or NULL */
1701 :
1702 19 : if (U_FAILURE(*err)) {
1703 0 : return 0;
1704 : }
1705 :
1706 19 : if (_hasBCP47Extension(localeID)) {
1707 0 : _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1708 : } else {
1709 19 : if (localeID==NULL) {
1710 0 : localeID=uloc_getDefault();
1711 : }
1712 19 : tmpLocaleID=localeID;
1713 : }
1714 :
1715 19 : origLocaleID=tmpLocaleID;
1716 :
1717 : /* if we are doing a full canonicalization, then put results in
1718 : localeBuffer, if necessary; otherwise send them to result. */
1719 19 : if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1720 19 : (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1721 0 : name = localeBuffer;
1722 0 : nameCapacity = (int32_t)sizeof(localeBuffer);
1723 : } else {
1724 19 : name = result;
1725 19 : nameCapacity = resultCapacity;
1726 : }
1727 :
1728 : /* get all pieces, one after another, and separate with '_' */
1729 19 : len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1730 :
1731 19 : if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1732 0 : const char *d = uloc_getDefault();
1733 :
1734 0 : len = (int32_t)uprv_strlen(d);
1735 :
1736 0 : if (name != NULL) {
1737 0 : uprv_strncpy(name, d, len);
1738 : }
1739 19 : } else if(_isIDSeparator(*tmpLocaleID)) {
1740 : const char *scriptID;
1741 :
1742 18 : ++fieldCount;
1743 18 : if(len<nameCapacity) {
1744 18 : name[len]='_';
1745 : }
1746 18 : ++len;
1747 :
1748 36 : scriptSize=ulocimp_getScript(tmpLocaleID+1,
1749 36 : (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1750 18 : if(scriptSize > 0) {
1751 : /* Found optional script */
1752 0 : tmpLocaleID = scriptID;
1753 0 : ++fieldCount;
1754 0 : len+=scriptSize;
1755 0 : if (_isIDSeparator(*tmpLocaleID)) {
1756 : /* If there is something else, then we add the _ */
1757 0 : if(len<nameCapacity) {
1758 0 : name[len]='_';
1759 : }
1760 0 : ++len;
1761 : }
1762 : }
1763 :
1764 18 : if (_isIDSeparator(*tmpLocaleID)) {
1765 : const char *cntryID;
1766 36 : int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1767 36 : (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1768 18 : if (cntrySize > 0) {
1769 : /* Found optional country */
1770 18 : tmpLocaleID = cntryID;
1771 18 : len+=cntrySize;
1772 : }
1773 18 : if(_isIDSeparator(*tmpLocaleID)) {
1774 : /* If there is something else, then we add the _ if we found country before. */
1775 0 : if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1776 0 : ++fieldCount;
1777 0 : if(len<nameCapacity) {
1778 0 : name[len]='_';
1779 : }
1780 0 : ++len;
1781 : }
1782 :
1783 0 : variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1784 0 : (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1785 0 : if (variantSize > 0) {
1786 0 : variant = len<nameCapacity ? name+len : NULL;
1787 0 : len += variantSize;
1788 0 : tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1789 : }
1790 : }
1791 : }
1792 : }
1793 :
1794 : /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1795 19 : if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1796 0 : UBool done = FALSE;
1797 0 : do {
1798 0 : char c = *tmpLocaleID;
1799 0 : switch (c) {
1800 : case 0:
1801 : case '@':
1802 0 : done = TRUE;
1803 0 : break;
1804 : default:
1805 0 : if (len<nameCapacity) {
1806 0 : name[len] = c;
1807 : }
1808 0 : ++len;
1809 0 : ++tmpLocaleID;
1810 0 : break;
1811 : }
1812 0 : } while (!done);
1813 : }
1814 :
1815 : /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1816 : After this, tmpLocaleID either points to '@' or is NULL */
1817 19 : if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1818 0 : keywordAssign = uprv_strchr(tmpLocaleID, '=');
1819 0 : separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1820 : }
1821 :
1822 : /* Copy POSIX-style variant, if any [mr@FOO] */
1823 22 : if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1824 3 : tmpLocaleID != NULL && keywordAssign == NULL) {
1825 : for (;;) {
1826 0 : char c = *tmpLocaleID;
1827 0 : if (c == 0) {
1828 0 : break;
1829 : }
1830 0 : if (len<nameCapacity) {
1831 0 : name[len] = c;
1832 : }
1833 0 : ++len;
1834 0 : ++tmpLocaleID;
1835 0 : }
1836 : }
1837 :
1838 19 : if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1839 : /* Handle @FOO variant if @ is present and not followed by = */
1840 16 : if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1841 : int32_t posixVariantSize;
1842 : /* Add missing '_' if needed */
1843 0 : if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1844 0 : do {
1845 0 : if(len<nameCapacity) {
1846 0 : name[len]='_';
1847 : }
1848 0 : ++len;
1849 0 : ++fieldCount;
1850 0 : } while(fieldCount<2);
1851 : }
1852 0 : posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1853 0 : (UBool)(variantSize > 0));
1854 0 : if (posixVariantSize > 0) {
1855 0 : if (variant == NULL) {
1856 0 : variant = name+len;
1857 : }
1858 0 : len += posixVariantSize;
1859 0 : variantSize += posixVariantSize;
1860 : }
1861 : }
1862 :
1863 : /* Handle generic variants first */
1864 16 : if (variant) {
1865 0 : for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
1866 0 : const char* variantToCompare = VARIANT_MAP[j].variant;
1867 0 : int32_t n = (int32_t)uprv_strlen(variantToCompare);
1868 0 : int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1869 0 : len -= variantLen;
1870 0 : if (variantLen > 0) {
1871 0 : if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1872 0 : --len;
1873 : }
1874 0 : addKeyword = VARIANT_MAP[j].keyword;
1875 0 : addValue = VARIANT_MAP[j].value;
1876 0 : break;
1877 : }
1878 : }
1879 0 : if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1880 0 : --len;
1881 : }
1882 : }
1883 :
1884 : /* Look up the ID in the canonicalization map */
1885 752 : for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1886 736 : const char* id = CANONICALIZE_MAP[j].id;
1887 736 : int32_t n = (int32_t)uprv_strlen(id);
1888 736 : if (len == n && uprv_strncmp(name, id, n) == 0) {
1889 0 : if (n == 0 && tmpLocaleID != NULL) {
1890 0 : break; /* Don't remap "" if keywords present */
1891 : }
1892 0 : len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1893 0 : if (CANONICALIZE_MAP[j].keyword) {
1894 0 : addKeyword = CANONICALIZE_MAP[j].keyword;
1895 0 : addValue = CANONICALIZE_MAP[j].value;
1896 : }
1897 0 : break;
1898 : }
1899 : }
1900 : }
1901 :
1902 19 : if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1903 19 : if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1904 0 : (!separatorIndicator || separatorIndicator > keywordAssign)) {
1905 0 : if(len<nameCapacity) {
1906 0 : name[len]='@';
1907 : }
1908 0 : ++len;
1909 0 : ++fieldCount;
1910 0 : len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1911 0 : NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1912 19 : } else if (addKeyword != NULL) {
1913 0 : U_ASSERT(addValue != NULL && len < nameCapacity);
1914 : /* inelegant but works -- later make _getKeywords do this? */
1915 0 : len += _copyCount(name+len, nameCapacity-len, "@");
1916 0 : len += _copyCount(name+len, nameCapacity-len, addKeyword);
1917 0 : len += _copyCount(name+len, nameCapacity-len, "=");
1918 0 : len += _copyCount(name+len, nameCapacity-len, addValue);
1919 : }
1920 : }
1921 :
1922 19 : if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1923 0 : uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1924 : }
1925 :
1926 19 : return u_terminateChars(result, resultCapacity, len, err);
1927 : }
1928 :
1929 : /* ### ID parsing API **************************************************/
1930 :
1931 : U_CAPI int32_t U_EXPORT2
1932 0 : uloc_getParent(const char* localeID,
1933 : char* parent,
1934 : int32_t parentCapacity,
1935 : UErrorCode* err)
1936 : {
1937 : const char *lastUnderscore;
1938 : int32_t i;
1939 :
1940 0 : if (U_FAILURE(*err))
1941 0 : return 0;
1942 :
1943 0 : if (localeID == NULL)
1944 0 : localeID = uloc_getDefault();
1945 :
1946 0 : lastUnderscore=uprv_strrchr(localeID, '_');
1947 0 : if(lastUnderscore!=NULL) {
1948 0 : i=(int32_t)(lastUnderscore-localeID);
1949 : } else {
1950 0 : i=0;
1951 : }
1952 :
1953 0 : if(i>0 && parent != localeID) {
1954 0 : uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1955 : }
1956 0 : return u_terminateChars(parent, parentCapacity, i, err);
1957 : }
1958 :
1959 : U_CAPI int32_t U_EXPORT2
1960 17 : uloc_getLanguage(const char* localeID,
1961 : char* language,
1962 : int32_t languageCapacity,
1963 : UErrorCode* err)
1964 : {
1965 : /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1966 17 : int32_t i=0;
1967 :
1968 17 : if (err==NULL || U_FAILURE(*err)) {
1969 0 : return 0;
1970 : }
1971 :
1972 17 : if(localeID==NULL) {
1973 0 : localeID=uloc_getDefault();
1974 : }
1975 :
1976 17 : i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1977 17 : return u_terminateChars(language, languageCapacity, i, err);
1978 : }
1979 :
1980 : U_CAPI int32_t U_EXPORT2
1981 17 : uloc_getScript(const char* localeID,
1982 : char* script,
1983 : int32_t scriptCapacity,
1984 : UErrorCode* err)
1985 : {
1986 17 : int32_t i=0;
1987 :
1988 17 : if(err==NULL || U_FAILURE(*err)) {
1989 0 : return 0;
1990 : }
1991 :
1992 17 : if(localeID==NULL) {
1993 0 : localeID=uloc_getDefault();
1994 : }
1995 :
1996 : /* skip the language */
1997 17 : ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1998 17 : if(_isIDSeparator(*localeID)) {
1999 17 : i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2000 : }
2001 17 : return u_terminateChars(script, scriptCapacity, i, err);
2002 : }
2003 :
2004 : U_CAPI int32_t U_EXPORT2
2005 11 : uloc_getCountry(const char* localeID,
2006 : char* country,
2007 : int32_t countryCapacity,
2008 : UErrorCode* err)
2009 : {
2010 11 : int32_t i=0;
2011 :
2012 11 : if(err==NULL || U_FAILURE(*err)) {
2013 0 : return 0;
2014 : }
2015 :
2016 11 : if(localeID==NULL) {
2017 0 : localeID=uloc_getDefault();
2018 : }
2019 :
2020 : /* Skip the language */
2021 11 : ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2022 11 : if(_isIDSeparator(*localeID)) {
2023 : const char *scriptID;
2024 : /* Skip the script if available */
2025 11 : ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2026 11 : if(scriptID != localeID+1) {
2027 : /* Found optional script */
2028 0 : localeID = scriptID;
2029 : }
2030 11 : if(_isIDSeparator(*localeID)) {
2031 11 : i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2032 : }
2033 : }
2034 11 : return u_terminateChars(country, countryCapacity, i, err);
2035 : }
2036 :
2037 : U_CAPI int32_t U_EXPORT2
2038 22 : uloc_getVariant(const char* localeID,
2039 : char* variant,
2040 : int32_t variantCapacity,
2041 : UErrorCode* err)
2042 : {
2043 : char tempBuffer[ULOC_FULLNAME_CAPACITY];
2044 : const char* tmpLocaleID;
2045 22 : int32_t i=0;
2046 :
2047 22 : if(err==NULL || U_FAILURE(*err)) {
2048 0 : return 0;
2049 : }
2050 :
2051 22 : if (_hasBCP47Extension(localeID)) {
2052 0 : _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2053 : } else {
2054 22 : if (localeID==NULL) {
2055 0 : localeID=uloc_getDefault();
2056 : }
2057 22 : tmpLocaleID=localeID;
2058 : }
2059 :
2060 : /* Skip the language */
2061 22 : ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2062 22 : if(_isIDSeparator(*tmpLocaleID)) {
2063 : const char *scriptID;
2064 : /* Skip the script if available */
2065 22 : ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2066 22 : if(scriptID != tmpLocaleID+1) {
2067 : /* Found optional script */
2068 0 : tmpLocaleID = scriptID;
2069 : }
2070 : /* Skip the Country */
2071 22 : if (_isIDSeparator(*tmpLocaleID)) {
2072 : const char *cntryID;
2073 22 : ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2074 22 : if (cntryID != tmpLocaleID+1) {
2075 : /* Found optional country */
2076 22 : tmpLocaleID = cntryID;
2077 : }
2078 22 : if(_isIDSeparator(*tmpLocaleID)) {
2079 : /* If there was no country ID, skip a possible extra IDSeparator */
2080 0 : if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2081 0 : tmpLocaleID++;
2082 : }
2083 0 : i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2084 : }
2085 : }
2086 : }
2087 :
2088 : /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2089 : /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2090 : /*
2091 : if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2092 : i=_getVariant(localeID+1, '@', variant, variantCapacity);
2093 : }
2094 : */
2095 22 : return u_terminateChars(variant, variantCapacity, i, err);
2096 : }
2097 :
2098 : U_CAPI int32_t U_EXPORT2
2099 3 : uloc_getName(const char* localeID,
2100 : char* name,
2101 : int32_t nameCapacity,
2102 : UErrorCode* err)
2103 : {
2104 3 : return _canonicalize(localeID, name, nameCapacity, 0, err);
2105 : }
2106 :
2107 : U_CAPI int32_t U_EXPORT2
2108 0 : uloc_getBaseName(const char* localeID,
2109 : char* name,
2110 : int32_t nameCapacity,
2111 : UErrorCode* err)
2112 : {
2113 0 : return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2114 : }
2115 :
2116 : U_CAPI int32_t U_EXPORT2
2117 16 : uloc_canonicalize(const char* localeID,
2118 : char* name,
2119 : int32_t nameCapacity,
2120 : UErrorCode* err)
2121 : {
2122 16 : return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2123 : }
2124 :
2125 : U_CAPI const char* U_EXPORT2
2126 0 : uloc_getISO3Language(const char* localeID)
2127 : {
2128 : int16_t offset;
2129 : char lang[ULOC_LANG_CAPACITY];
2130 0 : UErrorCode err = U_ZERO_ERROR;
2131 :
2132 0 : if (localeID == NULL)
2133 : {
2134 0 : localeID = uloc_getDefault();
2135 : }
2136 0 : uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2137 0 : if (U_FAILURE(err))
2138 0 : return "";
2139 0 : offset = _findIndex(LANGUAGES, lang);
2140 0 : if (offset < 0)
2141 0 : return "";
2142 0 : return LANGUAGES_3[offset];
2143 : }
2144 :
2145 : U_CAPI const char* U_EXPORT2
2146 0 : uloc_getISO3Country(const char* localeID)
2147 : {
2148 : int16_t offset;
2149 : char cntry[ULOC_LANG_CAPACITY];
2150 0 : UErrorCode err = U_ZERO_ERROR;
2151 :
2152 0 : if (localeID == NULL)
2153 : {
2154 0 : localeID = uloc_getDefault();
2155 : }
2156 0 : uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2157 0 : if (U_FAILURE(err))
2158 0 : return "";
2159 0 : offset = _findIndex(COUNTRIES, cntry);
2160 0 : if (offset < 0)
2161 0 : return "";
2162 :
2163 0 : return COUNTRIES_3[offset];
2164 : }
2165 :
2166 : U_CAPI uint32_t U_EXPORT2
2167 0 : uloc_getLCID(const char* localeID)
2168 : {
2169 0 : UErrorCode status = U_ZERO_ERROR;
2170 : char langID[ULOC_FULLNAME_CAPACITY];
2171 0 : uint32_t lcid = 0;
2172 :
2173 : /* Check for incomplete id. */
2174 0 : if (!localeID || uprv_strlen(localeID) < 2) {
2175 0 : return 0;
2176 : }
2177 :
2178 : // Attempt platform lookup if available
2179 0 : lcid = uprv_convertToLCIDPlatform(localeID);
2180 0 : if (lcid > 0)
2181 : {
2182 : // Windows found an LCID, return that
2183 0 : return lcid;
2184 : }
2185 :
2186 0 : uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2187 0 : if (U_FAILURE(status)) {
2188 0 : return 0;
2189 : }
2190 :
2191 0 : if (uprv_strchr(localeID, '@')) {
2192 : // uprv_convertToLCID does not support keywords other than collation.
2193 : // Remove all keywords except collation.
2194 : int32_t len;
2195 : char collVal[ULOC_KEYWORDS_CAPACITY];
2196 : char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2197 :
2198 : len = uloc_getKeywordValue(localeID, "collation", collVal,
2199 0 : UPRV_LENGTHOF(collVal) - 1, &status);
2200 :
2201 0 : if (U_SUCCESS(status) && len > 0) {
2202 0 : collVal[len] = 0;
2203 :
2204 : len = uloc_getBaseName(localeID, tmpLocaleID,
2205 0 : UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2206 :
2207 0 : if (U_SUCCESS(status) && len > 0) {
2208 0 : tmpLocaleID[len] = 0;
2209 :
2210 0 : len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2211 0 : UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2212 :
2213 0 : if (U_SUCCESS(status) && len > 0) {
2214 0 : tmpLocaleID[len] = 0;
2215 0 : return uprv_convertToLCID(langID, tmpLocaleID, &status);
2216 : }
2217 : }
2218 : }
2219 :
2220 : // fall through - all keywords are simply ignored
2221 0 : status = U_ZERO_ERROR;
2222 : }
2223 :
2224 0 : return uprv_convertToLCID(langID, localeID, &status);
2225 : }
2226 :
2227 : U_CAPI int32_t U_EXPORT2
2228 0 : uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2229 : UErrorCode *status)
2230 : {
2231 0 : return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2232 : }
2233 :
2234 : /* ### Default locale **************************************************/
2235 :
2236 : U_CAPI const char* U_EXPORT2
2237 2 : uloc_getDefault()
2238 : {
2239 2 : return locale_get_default();
2240 : }
2241 :
2242 : U_CAPI void U_EXPORT2
2243 0 : uloc_setDefault(const char* newDefaultLocale,
2244 : UErrorCode* err)
2245 : {
2246 0 : if (U_FAILURE(*err))
2247 0 : return;
2248 : /* the error code isn't currently used for anything by this function*/
2249 :
2250 : /* propagate change to C++ */
2251 0 : locale_set_default(newDefaultLocale);
2252 : }
2253 :
2254 : /**
2255 : * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2256 : * to an array of pointers to arrays of char. All of these pointers are owned
2257 : * by ICU-- do not delete them, and do not write through them. The array is
2258 : * terminated with a null pointer.
2259 : */
2260 : U_CAPI const char* const* U_EXPORT2
2261 0 : uloc_getISOLanguages()
2262 : {
2263 0 : return LANGUAGES;
2264 : }
2265 :
2266 : /**
2267 : * Returns a list of all 2-letter country codes defined in ISO 639. This is a
2268 : * pointer to an array of pointers to arrays of char. All of these pointers are
2269 : * owned by ICU-- do not delete them, and do not write through them. The array is
2270 : * terminated with a null pointer.
2271 : */
2272 : U_CAPI const char* const* U_EXPORT2
2273 0 : uloc_getISOCountries()
2274 : {
2275 0 : return COUNTRIES;
2276 : }
2277 :
2278 :
2279 : /* this function to be moved into cstring.c later */
2280 : static char gDecimal = 0;
2281 :
2282 : static /* U_CAPI */
2283 : double
2284 : /* U_EXPORT2 */
2285 0 : _uloc_strtod(const char *start, char **end) {
2286 : char *decimal;
2287 : char *myEnd;
2288 : char buf[30];
2289 : double rv;
2290 0 : if (!gDecimal) {
2291 : char rep[5];
2292 : /* For machines that decide to change the decimal on you,
2293 : and try to be too smart with localization.
2294 : This normally should be just a '.'. */
2295 0 : sprintf(rep, "%+1.1f", 1.0);
2296 0 : gDecimal = rep[2];
2297 : }
2298 :
2299 0 : if(gDecimal == '.') {
2300 0 : return uprv_strtod(start, end); /* fall through to OS */
2301 : } else {
2302 0 : uprv_strncpy(buf, start, 29);
2303 0 : buf[29]=0;
2304 0 : decimal = uprv_strchr(buf, '.');
2305 0 : if(decimal) {
2306 0 : *decimal = gDecimal;
2307 : } else {
2308 0 : return uprv_strtod(start, end); /* no decimal point */
2309 : }
2310 0 : rv = uprv_strtod(buf, &myEnd);
2311 0 : if(end) {
2312 0 : *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2313 : }
2314 0 : return rv;
2315 : }
2316 : }
2317 :
2318 : typedef struct {
2319 : float q;
2320 : int32_t dummy; /* to avoid uninitialized memory copy from qsort */
2321 : char locale[ULOC_FULLNAME_CAPACITY+1];
2322 : } _acceptLangItem;
2323 :
2324 : static int32_t U_CALLCONV
2325 0 : uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2326 : {
2327 0 : const _acceptLangItem *aa = (const _acceptLangItem*)a;
2328 0 : const _acceptLangItem *bb = (const _acceptLangItem*)b;
2329 :
2330 0 : int32_t rc = 0;
2331 0 : if(bb->q < aa->q) {
2332 0 : rc = -1; /* A > B */
2333 0 : } else if(bb->q > aa->q) {
2334 0 : rc = 1; /* A < B */
2335 : } else {
2336 0 : rc = 0; /* A = B */
2337 : }
2338 :
2339 0 : if(rc==0) {
2340 0 : rc = uprv_stricmp(aa->locale, bb->locale);
2341 : }
2342 :
2343 : #if defined(ULOC_DEBUG)
2344 : /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2345 : aa->locale, aa->q,
2346 : bb->locale, bb->q,
2347 : rc);*/
2348 : #endif
2349 :
2350 0 : return rc;
2351 : }
2352 :
2353 : /*
2354 : mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2355 : */
2356 :
2357 : U_CAPI int32_t U_EXPORT2
2358 0 : uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2359 : const char *httpAcceptLanguage,
2360 : UEnumeration* availableLocales,
2361 : UErrorCode *status)
2362 : {
2363 0 : MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2364 : char tmp[ULOC_FULLNAME_CAPACITY +1];
2365 0 : int32_t n = 0;
2366 : const char *itemEnd;
2367 : const char *paramEnd;
2368 : const char *s;
2369 : const char *t;
2370 : int32_t res;
2371 : int32_t i;
2372 0 : int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2373 :
2374 0 : if(U_FAILURE(*status)) {
2375 0 : return -1;
2376 : }
2377 :
2378 0 : for(s=httpAcceptLanguage;s&&*s;) {
2379 0 : while(isspace(*s)) /* eat space at the beginning */
2380 0 : s++;
2381 0 : itemEnd=uprv_strchr(s,',');
2382 0 : paramEnd=uprv_strchr(s,';');
2383 0 : if(!itemEnd) {
2384 0 : itemEnd = httpAcceptLanguage+l; /* end of string */
2385 : }
2386 0 : if(paramEnd && paramEnd<itemEnd) {
2387 : /* semicolon (;) is closer than end (,) */
2388 0 : t = paramEnd+1;
2389 0 : if(*t=='q') {
2390 0 : t++;
2391 : }
2392 0 : while(isspace(*t)) {
2393 0 : t++;
2394 : }
2395 0 : if(*t=='=') {
2396 0 : t++;
2397 : }
2398 0 : while(isspace(*t)) {
2399 0 : t++;
2400 : }
2401 0 : items[n].q = (float)_uloc_strtod(t,NULL);
2402 : } else {
2403 : /* no semicolon - it's 1.0 */
2404 0 : items[n].q = 1.0f;
2405 0 : paramEnd = itemEnd;
2406 : }
2407 0 : items[n].dummy=0;
2408 : /* eat spaces prior to semi */
2409 0 : for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2410 : ;
2411 0 : int32_t slen = ((t+1)-s);
2412 0 : if(slen > ULOC_FULLNAME_CAPACITY) {
2413 0 : *status = U_BUFFER_OVERFLOW_ERROR;
2414 0 : return -1; // too big
2415 : }
2416 0 : uprv_strncpy(items[n].locale, s, slen);
2417 0 : items[n].locale[slen]=0; // terminate
2418 0 : int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2419 0 : if(U_FAILURE(*status)) return -1;
2420 0 : if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2421 : // canonicalization had an effect- copy back
2422 0 : uprv_strncpy(items[n].locale, tmp, clen);
2423 0 : items[n].locale[clen] = 0; // terminate
2424 : }
2425 : #if defined(ULOC_DEBUG)
2426 : /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2427 : #endif
2428 0 : n++;
2429 0 : s = itemEnd;
2430 0 : while(*s==',') { /* eat duplicate commas */
2431 0 : s++;
2432 : }
2433 0 : if(n>=items.getCapacity()) { // If we need more items
2434 0 : if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2435 0 : *status = U_MEMORY_ALLOCATION_ERROR;
2436 0 : return -1;
2437 : }
2438 : #if defined(ULOC_DEBUG)
2439 : fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2440 : #endif
2441 : }
2442 : }
2443 0 : uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2444 0 : if (U_FAILURE(*status)) {
2445 0 : return -1;
2446 : }
2447 0 : LocalMemory<const char*> strs(NULL);
2448 0 : if (strs.allocateInsteadAndReset(n) == NULL) {
2449 0 : *status = U_MEMORY_ALLOCATION_ERROR;
2450 0 : return -1;
2451 : }
2452 0 : for(i=0;i<n;i++) {
2453 : #if defined(ULOC_DEBUG)
2454 : /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2455 : #endif
2456 0 : strs[i]=items[i].locale;
2457 : }
2458 0 : res = uloc_acceptLanguage(result, resultAvailable, outResult,
2459 0 : strs.getAlias(), n, availableLocales, status);
2460 0 : return res;
2461 : }
2462 :
2463 :
2464 : U_CAPI int32_t U_EXPORT2
2465 0 : uloc_acceptLanguage(char *result, int32_t resultAvailable,
2466 : UAcceptResult *outResult, const char **acceptList,
2467 : int32_t acceptListCount,
2468 : UEnumeration* availableLocales,
2469 : UErrorCode *status)
2470 : {
2471 : int32_t i,j;
2472 : int32_t len;
2473 0 : int32_t maxLen=0;
2474 : char tmp[ULOC_FULLNAME_CAPACITY+1];
2475 : const char *l;
2476 : char **fallbackList;
2477 0 : if(U_FAILURE(*status)) {
2478 0 : return -1;
2479 : }
2480 0 : fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2481 0 : if(fallbackList==NULL) {
2482 0 : *status = U_MEMORY_ALLOCATION_ERROR;
2483 0 : return -1;
2484 : }
2485 0 : for(i=0;i<acceptListCount;i++) {
2486 : #if defined(ULOC_DEBUG)
2487 : fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2488 : #endif
2489 0 : while((l=uenum_next(availableLocales, NULL, status))) {
2490 : #if defined(ULOC_DEBUG)
2491 : fprintf(stderr," %s\n", l);
2492 : #endif
2493 0 : len = (int32_t)uprv_strlen(l);
2494 0 : if(!uprv_strcmp(acceptList[i], l)) {
2495 0 : if(outResult) {
2496 0 : *outResult = ULOC_ACCEPT_VALID;
2497 : }
2498 : #if defined(ULOC_DEBUG)
2499 : fprintf(stderr, "MATCH! %s\n", l);
2500 : #endif
2501 0 : if(len>0) {
2502 0 : uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2503 : }
2504 0 : for(j=0;j<i;j++) {
2505 0 : uprv_free(fallbackList[j]);
2506 : }
2507 0 : uprv_free(fallbackList);
2508 0 : return u_terminateChars(result, resultAvailable, len, status);
2509 : }
2510 0 : if(len>maxLen) {
2511 0 : maxLen = len;
2512 : }
2513 : }
2514 0 : uenum_reset(availableLocales, status);
2515 : /* save off parent info */
2516 0 : if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2517 0 : fallbackList[i] = uprv_strdup(tmp);
2518 : } else {
2519 0 : fallbackList[i]=0;
2520 : }
2521 : }
2522 :
2523 0 : for(maxLen--;maxLen>0;maxLen--) {
2524 0 : for(i=0;i<acceptListCount;i++) {
2525 0 : if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2526 : #if defined(ULOC_DEBUG)
2527 : fprintf(stderr,"Try: [%s]", fallbackList[i]);
2528 : #endif
2529 0 : while((l=uenum_next(availableLocales, NULL, status))) {
2530 : #if defined(ULOC_DEBUG)
2531 : fprintf(stderr," %s\n", l);
2532 : #endif
2533 0 : len = (int32_t)uprv_strlen(l);
2534 0 : if(!uprv_strcmp(fallbackList[i], l)) {
2535 0 : if(outResult) {
2536 0 : *outResult = ULOC_ACCEPT_FALLBACK;
2537 : }
2538 : #if defined(ULOC_DEBUG)
2539 : fprintf(stderr, "fallback MATCH! %s\n", l);
2540 : #endif
2541 0 : if(len>0) {
2542 0 : uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2543 : }
2544 0 : for(j=0;j<acceptListCount;j++) {
2545 0 : uprv_free(fallbackList[j]);
2546 : }
2547 0 : uprv_free(fallbackList);
2548 0 : return u_terminateChars(result, resultAvailable, len, status);
2549 : }
2550 : }
2551 0 : uenum_reset(availableLocales, status);
2552 :
2553 0 : if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2554 0 : uprv_free(fallbackList[i]);
2555 0 : fallbackList[i] = uprv_strdup(tmp);
2556 : } else {
2557 0 : uprv_free(fallbackList[i]);
2558 0 : fallbackList[i]=0;
2559 : }
2560 : }
2561 : }
2562 0 : if(outResult) {
2563 0 : *outResult = ULOC_ACCEPT_FAILED;
2564 : }
2565 : }
2566 0 : for(i=0;i<acceptListCount;i++) {
2567 0 : uprv_free(fallbackList[i]);
2568 : }
2569 0 : uprv_free(fallbackList);
2570 0 : return -1;
2571 : }
2572 :
2573 : U_CAPI const char* U_EXPORT2
2574 0 : uloc_toUnicodeLocaleKey(const char* keyword)
2575 : {
2576 0 : const char* bcpKey = ulocimp_toBcpKey(keyword);
2577 0 : if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2578 : // unknown keyword, but syntax is fine..
2579 0 : return keyword;
2580 : }
2581 0 : return bcpKey;
2582 : }
2583 :
2584 : U_CAPI const char* U_EXPORT2
2585 0 : uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2586 : {
2587 0 : const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2588 0 : if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2589 : // unknown keyword, but syntax is fine..
2590 0 : return value;
2591 : }
2592 0 : return bcpType;
2593 : }
2594 :
2595 : static UBool
2596 0 : isWellFormedLegacyKey(const char* legacyKey)
2597 : {
2598 0 : const char* p = legacyKey;
2599 0 : while (*p) {
2600 0 : if (!UPRV_ISALPHANUM(*p)) {
2601 0 : return FALSE;
2602 : }
2603 0 : p++;
2604 : }
2605 0 : return TRUE;
2606 : }
2607 :
2608 : static UBool
2609 0 : isWellFormedLegacyType(const char* legacyType)
2610 : {
2611 0 : const char* p = legacyType;
2612 0 : int32_t alphaNumLen = 0;
2613 0 : while (*p) {
2614 0 : if (*p == '_' || *p == '/' || *p == '-') {
2615 0 : if (alphaNumLen == 0) {
2616 0 : return FALSE;
2617 : }
2618 0 : alphaNumLen = 0;
2619 0 : } else if (UPRV_ISALPHANUM(*p)) {
2620 0 : alphaNumLen++;
2621 : } else {
2622 0 : return FALSE;
2623 : }
2624 0 : p++;
2625 : }
2626 0 : return (alphaNumLen != 0);
2627 : }
2628 :
2629 : U_CAPI const char* U_EXPORT2
2630 0 : uloc_toLegacyKey(const char* keyword)
2631 : {
2632 0 : const char* legacyKey = ulocimp_toLegacyKey(keyword);
2633 0 : if (legacyKey == NULL) {
2634 : // Checks if the specified locale key is well-formed with the legacy locale syntax.
2635 : //
2636 : // Note:
2637 : // LDML/CLDR provides some definition of keyword syntax in
2638 : // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2639 : // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2640 : // Keys can only consist of [0-9a-zA-Z].
2641 0 : if (isWellFormedLegacyKey(keyword)) {
2642 0 : return keyword;
2643 : }
2644 : }
2645 0 : return legacyKey;
2646 : }
2647 :
2648 : U_CAPI const char* U_EXPORT2
2649 0 : uloc_toLegacyType(const char* keyword, const char* value)
2650 : {
2651 0 : const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2652 0 : if (legacyType == NULL) {
2653 : // Checks if the specified locale type is well-formed with the legacy locale syntax.
2654 : //
2655 : // Note:
2656 : // LDML/CLDR provides some definition of keyword syntax in
2657 : // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2658 : // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2659 : // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2660 : // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2661 0 : if (isWellFormedLegacyType(value)) {
2662 0 : return value;
2663 : }
2664 : }
2665 0 : return legacyType;
2666 : }
2667 :
2668 : /*eof*/
|