Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 1997-2015, International Business Machines Corporation and *
6 : * others. All Rights Reserved. *
7 : *******************************************************************************
8 : *
9 : * File COMPACTDECIMALFORMAT.CPP
10 : *
11 : ********************************************************************************
12 : */
13 : #include "unicode/utypes.h"
14 :
15 : #if !UCONFIG_NO_FORMATTING
16 :
17 : #include "charstr.h"
18 : #include "cstring.h"
19 : #include "digitlst.h"
20 : #include "mutex.h"
21 : #include "unicode/compactdecimalformat.h"
22 : #include "unicode/numsys.h"
23 : #include "unicode/plurrule.h"
24 : #include "unicode/ures.h"
25 : #include "ucln_in.h"
26 : #include "uhash.h"
27 : #include "umutex.h"
28 : #include "unicode/ures.h"
29 : #include "uresimp.h"
30 :
31 : // Maps locale name to CDFLocaleData struct.
32 : static UHashtable* gCompactDecimalData = NULL;
33 : static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
34 :
35 : U_NAMESPACE_BEGIN
36 :
37 : static const int32_t MAX_DIGITS = 15;
38 : static const char gOther[] = "other";
39 : static const char gLatnTag[] = "latn";
40 : static const char gNumberElementsTag[] = "NumberElements";
41 : static const char gDecimalFormatTag[] = "decimalFormat";
42 : static const char gPatternsShort[] = "patternsShort";
43 : static const char gPatternsLong[] = "patternsLong";
44 : static const char gLatnPath[] = "NumberElements/latn";
45 :
46 : static const UChar u_0 = 0x30;
47 : static const UChar u_apos = 0x27;
48 :
49 : static const UChar kZero[] = {u_0};
50 :
51 : // Used to unescape single quotes.
52 : enum QuoteState {
53 : OUTSIDE,
54 : INSIDE_EMPTY,
55 : INSIDE_FULL
56 : };
57 :
58 : enum FallbackFlags {
59 : ANY = 0,
60 : MUST = 1,
61 : NOT_ROOT = 2
62 : // Next one will be 4 then 6 etc.
63 : };
64 :
65 :
66 : // CDFUnit represents a prefix-suffix pair for a particular variant
67 : // and log10 value.
68 0 : struct CDFUnit : public UMemory {
69 : UnicodeString prefix;
70 : UnicodeString suffix;
71 0 : inline CDFUnit() : prefix(), suffix() {
72 0 : prefix.setToBogus();
73 0 : }
74 0 : inline ~CDFUnit() {}
75 0 : inline UBool isSet() const {
76 0 : return !prefix.isBogus();
77 : }
78 0 : inline void markAsSet() {
79 0 : prefix.remove();
80 0 : }
81 : };
82 :
83 : // CDFLocaleStyleData contains formatting data for a particular locale
84 : // and style.
85 : class CDFLocaleStyleData : public UMemory {
86 : public:
87 : // What to divide by for each log10 value when formatting. These values
88 : // will be powers of 10. For English, would be:
89 : // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
90 : double divisors[MAX_DIGITS];
91 : // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
92 : // To format a number x,
93 : // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
94 : // Compute the plural variant for displayNum
95 : // (e.g zero, one, two, few, many, other).
96 : // Compute cdfUnits = unitsByVariant[pluralVariant].
97 : // Prefix and suffix to use at cdfUnits[log10(x)]
98 : UHashtable* unitsByVariant;
99 : // A flag for whether or not this CDFLocaleStyleData was loaded from the
100 : // Latin numbering system as a fallback from the locale numbering system.
101 : // This value is meaningless if the object is bogus or empty.
102 : UBool fromFallback;
103 0 : inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
104 0 : uprv_memset(divisors, 0, sizeof(divisors));
105 0 : }
106 : ~CDFLocaleStyleData();
107 : // Init initializes this object.
108 : void Init(UErrorCode& status);
109 0 : inline UBool isBogus() const {
110 0 : return unitsByVariant == NULL;
111 : }
112 : void setToBogus();
113 0 : UBool isEmpty() {
114 0 : return unitsByVariant == NULL || unitsByVariant->count == 0;
115 : }
116 : private:
117 : CDFLocaleStyleData(const CDFLocaleStyleData&);
118 : CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
119 : };
120 :
121 : // CDFLocaleData contains formatting data for a particular locale.
122 : struct CDFLocaleData : public UMemory {
123 : CDFLocaleStyleData shortData;
124 : CDFLocaleStyleData longData;
125 0 : inline CDFLocaleData() : shortData(), longData() { }
126 0 : inline ~CDFLocaleData() { }
127 : // Init initializes this object.
128 : void Init(UErrorCode& status);
129 : };
130 :
131 : U_NAMESPACE_END
132 :
133 : U_CDECL_BEGIN
134 :
135 0 : static UBool U_CALLCONV cdf_cleanup(void) {
136 0 : if (gCompactDecimalData != NULL) {
137 0 : uhash_close(gCompactDecimalData);
138 0 : gCompactDecimalData = NULL;
139 : }
140 0 : return TRUE;
141 : }
142 :
143 0 : static void U_CALLCONV deleteCDFUnits(void* ptr) {
144 0 : delete [] (icu::CDFUnit*) ptr;
145 0 : }
146 :
147 0 : static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
148 0 : delete (icu::CDFLocaleData*) ptr;
149 0 : }
150 :
151 : U_CDECL_END
152 :
153 : U_NAMESPACE_BEGIN
154 :
155 : static UBool divisors_equal(const double* lhs, const double* rhs);
156 : static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
157 :
158 : static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
159 : static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
160 : static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
161 : static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
162 : static double calculateDivisor(double power10, int32_t numZeros);
163 : static UBool onlySpaces(UnicodeString u);
164 : static void fixQuotes(UnicodeString& s);
165 : static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
166 : static void fillInMissing(CDFLocaleStyleData* result);
167 : static int32_t computeLog10(double x, UBool inRange);
168 : static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
169 : static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
170 :
171 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
172 :
173 0 : CompactDecimalFormat::CompactDecimalFormat(
174 : const DecimalFormat& decimalFormat,
175 : const UHashtable* unitsByVariant,
176 : const double* divisors,
177 0 : PluralRules* pluralRules)
178 0 : : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
179 0 : }
180 :
181 0 : CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
182 0 : : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
183 0 : }
184 :
185 : CompactDecimalFormat* U_EXPORT2
186 0 : CompactDecimalFormat::createInstance(
187 : const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
188 0 : LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
189 0 : if (U_FAILURE(status)) {
190 0 : return NULL;
191 : }
192 0 : LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
193 0 : if (U_FAILURE(status)) {
194 0 : return NULL;
195 : }
196 0 : const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
197 0 : if (U_FAILURE(status)) {
198 0 : return NULL;
199 : }
200 : CompactDecimalFormat* result =
201 0 : new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
202 0 : if (result == NULL) {
203 0 : status = U_MEMORY_ALLOCATION_ERROR;
204 0 : return NULL;
205 : }
206 0 : pluralRules.orphan();
207 0 : result->setMaximumSignificantDigits(3);
208 0 : result->setSignificantDigitsUsed(TRUE);
209 0 : result->setGroupingUsed(FALSE);
210 0 : return result;
211 : }
212 :
213 : CompactDecimalFormat&
214 0 : CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
215 0 : if (this != &rhs) {
216 0 : DecimalFormat::operator=(rhs);
217 0 : _unitsByVariant = rhs._unitsByVariant;
218 0 : _divisors = rhs._divisors;
219 0 : delete _pluralRules;
220 0 : _pluralRules = rhs._pluralRules->clone();
221 : }
222 0 : return *this;
223 : }
224 :
225 0 : CompactDecimalFormat::~CompactDecimalFormat() {
226 0 : delete _pluralRules;
227 0 : }
228 :
229 :
230 : Format*
231 0 : CompactDecimalFormat::clone(void) const {
232 0 : return new CompactDecimalFormat(*this);
233 : }
234 :
235 : UBool
236 0 : CompactDecimalFormat::operator==(const Format& that) const {
237 0 : if (this == &that) {
238 0 : return TRUE;
239 : }
240 0 : return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
241 : }
242 :
243 : UBool
244 0 : CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
245 0 : return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
246 : }
247 :
248 : UnicodeString&
249 0 : CompactDecimalFormat::format(
250 : double number,
251 : UnicodeString& appendTo,
252 : FieldPosition& pos) const {
253 0 : UErrorCode status = U_ZERO_ERROR;
254 0 : return format(number, appendTo, pos, status);
255 : }
256 :
257 : UnicodeString&
258 0 : CompactDecimalFormat::format(
259 : double number,
260 : UnicodeString& appendTo,
261 : FieldPosition& pos,
262 : UErrorCode &status) const {
263 0 : if (U_FAILURE(status)) {
264 0 : return appendTo;
265 : }
266 0 : DigitList orig, rounded;
267 0 : orig.set(number);
268 : UBool isNegative;
269 0 : _round(orig, rounded, isNegative, status);
270 0 : if (U_FAILURE(status)) {
271 0 : return appendTo;
272 : }
273 0 : double roundedDouble = rounded.getDouble();
274 0 : if (isNegative) {
275 0 : roundedDouble = -roundedDouble;
276 : }
277 0 : int32_t baseIdx = computeLog10(roundedDouble, TRUE);
278 0 : double numberToFormat = roundedDouble / _divisors[baseIdx];
279 0 : UnicodeString variant = _pluralRules->select(numberToFormat);
280 0 : if (isNegative) {
281 0 : numberToFormat = -numberToFormat;
282 : }
283 0 : const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
284 0 : appendTo += unit->prefix;
285 0 : DecimalFormat::format(numberToFormat, appendTo, pos);
286 0 : appendTo += unit->suffix;
287 0 : return appendTo;
288 : }
289 :
290 : UnicodeString&
291 0 : CompactDecimalFormat::format(
292 : double /* number */,
293 : UnicodeString& appendTo,
294 : FieldPositionIterator* /* posIter */,
295 : UErrorCode& status) const {
296 0 : status = U_UNSUPPORTED_ERROR;
297 0 : return appendTo;
298 : }
299 :
300 : UnicodeString&
301 0 : CompactDecimalFormat::format(
302 : int32_t number,
303 : UnicodeString& appendTo,
304 : FieldPosition& pos) const {
305 0 : return format((double) number, appendTo, pos);
306 : }
307 :
308 : UnicodeString&
309 0 : CompactDecimalFormat::format(
310 : int32_t number,
311 : UnicodeString& appendTo,
312 : FieldPosition& pos,
313 : UErrorCode &status) const {
314 0 : return format((double) number, appendTo, pos, status);
315 : }
316 :
317 : UnicodeString&
318 0 : CompactDecimalFormat::format(
319 : int32_t /* number */,
320 : UnicodeString& appendTo,
321 : FieldPositionIterator* /* posIter */,
322 : UErrorCode& status) const {
323 0 : status = U_UNSUPPORTED_ERROR;
324 0 : return appendTo;
325 : }
326 :
327 : UnicodeString&
328 0 : CompactDecimalFormat::format(
329 : int64_t number,
330 : UnicodeString& appendTo,
331 : FieldPosition& pos) const {
332 0 : return format((double) number, appendTo, pos);
333 : }
334 :
335 : UnicodeString&
336 0 : CompactDecimalFormat::format(
337 : int64_t number,
338 : UnicodeString& appendTo,
339 : FieldPosition& pos,
340 : UErrorCode &status) const {
341 0 : return format((double) number, appendTo, pos, status);
342 : }
343 :
344 : UnicodeString&
345 0 : CompactDecimalFormat::format(
346 : int64_t /* number */,
347 : UnicodeString& appendTo,
348 : FieldPositionIterator* /* posIter */,
349 : UErrorCode& status) const {
350 0 : status = U_UNSUPPORTED_ERROR;
351 0 : return appendTo;
352 : }
353 :
354 : UnicodeString&
355 0 : CompactDecimalFormat::format(
356 : StringPiece /* number */,
357 : UnicodeString& appendTo,
358 : FieldPositionIterator* /* posIter */,
359 : UErrorCode& status) const {
360 0 : status = U_UNSUPPORTED_ERROR;
361 0 : return appendTo;
362 : }
363 :
364 : UnicodeString&
365 0 : CompactDecimalFormat::format(
366 : const DigitList& /* number */,
367 : UnicodeString& appendTo,
368 : FieldPositionIterator* /* posIter */,
369 : UErrorCode& status) const {
370 0 : status = U_UNSUPPORTED_ERROR;
371 0 : return appendTo;
372 : }
373 :
374 : UnicodeString&
375 0 : CompactDecimalFormat::format(const DigitList& /* number */,
376 : UnicodeString& appendTo,
377 : FieldPosition& /* pos */,
378 : UErrorCode& status) const {
379 0 : status = U_UNSUPPORTED_ERROR;
380 0 : return appendTo;
381 : }
382 :
383 : void
384 0 : CompactDecimalFormat::parse(
385 : const UnicodeString& /* text */,
386 : Formattable& /* result */,
387 : ParsePosition& /* parsePosition */) const {
388 0 : }
389 :
390 : void
391 0 : CompactDecimalFormat::parse(
392 : const UnicodeString& /* text */,
393 : Formattable& /* result */,
394 : UErrorCode& status) const {
395 0 : status = U_UNSUPPORTED_ERROR;
396 0 : }
397 :
398 : CurrencyAmount*
399 0 : CompactDecimalFormat::parseCurrency(
400 : const UnicodeString& /* text */,
401 : ParsePosition& /* pos */) const {
402 0 : return NULL;
403 : }
404 :
405 0 : void CDFLocaleStyleData::Init(UErrorCode& status) {
406 0 : if (unitsByVariant != NULL) {
407 0 : return;
408 : }
409 0 : unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
410 0 : if (U_FAILURE(status)) {
411 0 : return;
412 : }
413 0 : uhash_setKeyDeleter(unitsByVariant, uprv_free);
414 0 : uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
415 : }
416 :
417 0 : CDFLocaleStyleData::~CDFLocaleStyleData() {
418 0 : setToBogus();
419 0 : }
420 :
421 0 : void CDFLocaleStyleData::setToBogus() {
422 0 : if (unitsByVariant != NULL) {
423 0 : uhash_close(unitsByVariant);
424 0 : unitsByVariant = NULL;
425 : }
426 0 : }
427 :
428 0 : void CDFLocaleData::Init(UErrorCode& status) {
429 0 : shortData.Init(status);
430 0 : if (U_FAILURE(status)) {
431 0 : return;
432 : }
433 0 : longData.Init(status);
434 : }
435 :
436 : // Helper method for operator=
437 0 : static UBool divisors_equal(const double* lhs, const double* rhs) {
438 0 : for (int32_t i = 0; i < MAX_DIGITS; ++i) {
439 0 : if (lhs[i] != rhs[i]) {
440 0 : return FALSE;
441 : }
442 : }
443 0 : return TRUE;
444 : }
445 :
446 : // getCDFLocaleStyleData returns pointer to formatting data for given locale and
447 : // style within the global cache. On cache miss, getCDFLocaleStyleData loads
448 : // the data from CLDR into the global cache before returning the pointer. If a
449 : // UNUM_LONG data is requested for a locale, and that locale does not have
450 : // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
451 : // that locale.
452 0 : static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
453 0 : if (U_FAILURE(status)) {
454 0 : return NULL;
455 : }
456 0 : CDFLocaleData* result = NULL;
457 0 : const char* key = inLocale.getName();
458 : {
459 0 : Mutex lock(&gCompactDecimalMetaLock);
460 0 : if (gCompactDecimalData == NULL) {
461 0 : gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
462 0 : if (U_FAILURE(status)) {
463 0 : return NULL;
464 : }
465 0 : uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
466 0 : uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
467 0 : ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
468 : } else {
469 0 : result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
470 : }
471 : }
472 0 : if (result != NULL) {
473 0 : return extractDataByStyleEnum(*result, style, status);
474 : }
475 :
476 0 : result = loadCDFLocaleData(inLocale, status);
477 0 : if (U_FAILURE(status)) {
478 0 : return NULL;
479 : }
480 :
481 : {
482 0 : Mutex lock(&gCompactDecimalMetaLock);
483 0 : CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
484 0 : if (temp != NULL) {
485 0 : delete result;
486 0 : result = temp;
487 : } else {
488 0 : uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
489 0 : if (U_FAILURE(status)) {
490 0 : return NULL;
491 : }
492 : }
493 : }
494 0 : return extractDataByStyleEnum(*result, style, status);
495 : }
496 :
497 0 : static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
498 0 : switch (style) {
499 : case UNUM_SHORT:
500 0 : return &data.shortData;
501 : case UNUM_LONG:
502 0 : if (!data.longData.isBogus()) {
503 0 : return &data.longData;
504 : }
505 0 : return &data.shortData;
506 : default:
507 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
508 0 : return NULL;
509 : }
510 : }
511 :
512 : // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
513 : // caller owns the returned pointer.
514 0 : static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
515 0 : if (U_FAILURE(status)) {
516 0 : return NULL;
517 : }
518 0 : CDFLocaleData* result = new CDFLocaleData;
519 0 : if (result == NULL) {
520 0 : status = U_MEMORY_ALLOCATION_ERROR;
521 0 : return NULL;
522 : }
523 0 : result->Init(status);
524 0 : if (U_FAILURE(status)) {
525 0 : delete result;
526 0 : return NULL;
527 : }
528 :
529 0 : load(inLocale, result, status);
530 :
531 0 : if (U_FAILURE(status)) {
532 0 : delete result;
533 0 : return NULL;
534 : }
535 0 : return result;
536 : }
537 :
538 : namespace {
539 :
540 : struct CmptDecDataSink : public ResourceSink {
541 :
542 : CDFLocaleData& dataBundle; // Where to save values when they are read
543 : UBool isLatin; // Whether or not we are traversing the Latin tree
544 : UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
545 :
546 : enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
547 : enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
548 :
549 : /*
550 : * NumberElements{ <-- top (numbering system table)
551 : * latn{ <-- patternsTable (one per numbering system)
552 : * patternsLong{ <-- formatsTable (one per pattern)
553 : * decimalFormat{ <-- powersOfTenTable (one per format)
554 : * 1000{ <-- pluralVariantsTable (one per power of ten)
555 : * one{"0 thousand"} <-- plural variant and template
556 : */
557 :
558 0 : CmptDecDataSink(CDFLocaleData& _dataBundle)
559 0 : : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
560 : virtual ~CmptDecDataSink();
561 :
562 0 : virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
563 : // SPECIAL CASE: Don't consume root in the non-Latin numbering system
564 0 : if (isRoot && !isLatin) { return; }
565 :
566 0 : ResourceTable patternsTable = value.getTable(errorCode);
567 0 : if (U_FAILURE(errorCode)) { return; }
568 0 : for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
569 :
570 : // Check for patternsShort or patternsLong
571 : EPatternsTableKey patternsTableKey;
572 0 : if (uprv_strcmp(key, gPatternsShort) == 0) {
573 0 : patternsTableKey = PATTERNS_SHORT;
574 0 : } else if (uprv_strcmp(key, gPatternsLong) == 0) {
575 0 : patternsTableKey = PATTERNS_LONG;
576 : } else {
577 0 : continue;
578 : }
579 :
580 : // Traverse into the formats table
581 0 : ResourceTable formatsTable = value.getTable(errorCode);
582 0 : if (U_FAILURE(errorCode)) { return; }
583 0 : for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
584 :
585 : // Check for decimalFormat or currencyFormat
586 : EFormatsTableKey formatsTableKey;
587 0 : if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
588 0 : formatsTableKey = DECIMAL_FORMAT;
589 : // TODO: Enable this statement when currency support is added
590 : // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
591 : // formatsTableKey = CURRENCY_FORMAT;
592 : } else {
593 0 : continue;
594 : }
595 :
596 : // Set the current style and destination based on the two keys
597 : UNumberCompactStyle style;
598 0 : CDFLocaleStyleData* destination = NULL;
599 0 : if (patternsTableKey == PATTERNS_LONG
600 0 : && formatsTableKey == DECIMAL_FORMAT) {
601 0 : style = UNUM_LONG;
602 0 : destination = &dataBundle.longData;
603 0 : } else if (patternsTableKey == PATTERNS_SHORT
604 0 : && formatsTableKey == DECIMAL_FORMAT) {
605 0 : style = UNUM_SHORT;
606 0 : destination = &dataBundle.shortData;
607 : // TODO: Enable the following statements when currency support is added
608 : // } else if (patternsTableKey == PATTERNS_SHORT
609 : // && formatsTableKey == CURRENCY_FORMAT) {
610 : // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
611 : // destination = &dataBundle.shortCurrencyData;
612 : // } else {
613 : // // Silently ignore this case
614 : // continue;
615 : }
616 :
617 : // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
618 : // 1) Don't consume longData if shortData was consumed from the non-Latin
619 : // locale numbering system
620 : // 2) Don't consume longData for the first time if this is the root bundle and
621 : // shortData is already populated from a more specific locale. Note that if
622 : // both longData and shortData are both only in root, longData will be
623 : // consumed since it is alphabetically before shortData in the bundle.
624 0 : if (isFallback
625 0 : && style == UNUM_LONG
626 0 : && !dataBundle.shortData.isEmpty()
627 0 : && !dataBundle.shortData.fromFallback) {
628 0 : continue;
629 : }
630 0 : if (isRoot
631 0 : && style == UNUM_LONG
632 0 : && dataBundle.longData.isEmpty()
633 0 : && !dataBundle.shortData.isEmpty()) {
634 0 : continue;
635 : }
636 :
637 : // Set the "fromFallback" flag on the data object
638 0 : destination->fromFallback = isFallback;
639 :
640 : // Traverse into the powers of ten table
641 0 : ResourceTable powersOfTenTable = value.getTable(errorCode);
642 0 : if (U_FAILURE(errorCode)) { return; }
643 0 : for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
644 :
645 : // The key will always be some even power of 10. e.g 10000.
646 0 : char* endPtr = NULL;
647 0 : double power10 = uprv_strtod(key, &endPtr);
648 0 : if (*endPtr != 0) {
649 0 : errorCode = U_INTERNAL_PROGRAM_ERROR;
650 0 : return;
651 : }
652 0 : int32_t log10Value = computeLog10(power10, FALSE);
653 :
654 : // Silently ignore divisors that are too big.
655 0 : if (log10Value >= MAX_DIGITS) continue;
656 :
657 : // Iterate over the plural variants ("one", "other", etc)
658 0 : ResourceTable pluralVariantsTable = value.getTable(errorCode);
659 0 : if (U_FAILURE(errorCode)) { return; }
660 0 : for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
661 0 : const char* pluralVariant = key;
662 0 : const UnicodeString formatStr = value.getUnicodeString(errorCode);
663 :
664 : // Copy the data into the in-memory data bundle (do not overwrite
665 : // existing values)
666 0 : int32_t numZeros = populatePrefixSuffix(
667 : pluralVariant, log10Value, formatStr,
668 0 : destination->unitsByVariant, FALSE, errorCode);
669 :
670 : // If populatePrefixSuffix returns -1, it means that this key has been
671 : // encountered already.
672 0 : if (numZeros < 0) {
673 0 : continue;
674 : }
675 :
676 : // Set the divisor, which is based on the number of zeros in the template
677 : // string. If the divisor from here is different from the one previously
678 : // stored, it means that the number of zeros in different plural variants
679 : // differs; throw an exception.
680 : // TODO: How should I check for floating-point errors here?
681 : // Is there a good reason why "divisor" is double and not long like Java?
682 0 : double divisor = calculateDivisor(power10, numZeros);
683 0 : if (destination->divisors[log10Value] != 0.0
684 0 : && destination->divisors[log10Value] != divisor) {
685 0 : errorCode = U_INTERNAL_PROGRAM_ERROR;
686 0 : return;
687 : }
688 0 : destination->divisors[log10Value] = divisor;
689 : }
690 : }
691 : }
692 : }
693 : }
694 : };
695 :
696 : // Virtual destructors must be defined out of line.
697 0 : CmptDecDataSink::~CmptDecDataSink() {}
698 :
699 : } // namespace
700 :
701 0 : static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
702 0 : LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
703 0 : if (U_FAILURE(status)) {
704 0 : return;
705 : }
706 0 : const char* nsName = ns->getName();
707 :
708 0 : LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
709 0 : if (U_FAILURE(status)) {
710 0 : return;
711 : }
712 0 : CmptDecDataSink sink(*result);
713 0 : sink.isFallback = FALSE;
714 :
715 : // First load the number elements data if nsName is not Latin.
716 0 : if (uprv_strcmp(nsName, gLatnTag) != 0) {
717 0 : sink.isLatin = FALSE;
718 0 : CharString path;
719 0 : path.append(gNumberElementsTag, status)
720 0 : .append('/', status)
721 0 : .append(nsName, status);
722 0 : ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
723 0 : if (status == U_MISSING_RESOURCE_ERROR) {
724 : // Silently ignore and use Latin
725 0 : status = U_ZERO_ERROR;
726 0 : } else if (U_FAILURE(status)) {
727 0 : return;
728 : }
729 0 : sink.isFallback = TRUE;
730 : }
731 :
732 : // Now load Latin.
733 0 : sink.isLatin = TRUE;
734 0 : ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
735 0 : if (U_FAILURE(status)) return;
736 :
737 : // If longData is empty, default it to be equal to shortData
738 0 : if (result->longData.isEmpty()) {
739 0 : result->longData.setToBogus();
740 : }
741 :
742 : // Check for "other" variants in each of the three data classes, and resolve missing elements.
743 :
744 0 : if (!result->longData.isBogus()) {
745 0 : checkForOtherVariants(&result->longData, status);
746 0 : if (U_FAILURE(status)) return;
747 0 : fillInMissing(&result->longData);
748 : }
749 :
750 0 : checkForOtherVariants(&result->shortData, status);
751 0 : if (U_FAILURE(status)) return;
752 0 : fillInMissing(&result->shortData);
753 :
754 : // TODO: Enable this statement when currency support is added
755 : // checkForOtherVariants(&result->shortCurrencyData, status);
756 : // if (U_FAILURE(status)) return;
757 : // fillInMissing(&result->shortCurrencyData);
758 : }
759 :
760 : // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
761 : // given variant and log10 value.
762 : // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
763 : // formatStr is the format string from which the prefix and suffix are
764 : // extracted. It is usually of form 'Pefix 000 suffix'.
765 : // populatePrefixSuffix returns the number of 0's found in formatStr
766 : // before the decimal point.
767 : // In the special case that formatStr contains only spaces for prefix
768 : // and suffix, populatePrefixSuffix returns log10Value + 1.
769 0 : static int32_t populatePrefixSuffix(
770 : const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
771 0 : if (U_FAILURE(status)) {
772 0 : return 0;
773 : }
774 :
775 : // ICU 59 HACK: Ignore negative part of format string, mimicking ICU 58 behavior.
776 : // TODO(sffc): Make sure this is fixed during the overhaul port in ICU 60.
777 0 : int32_t semiPos = formatStr.indexOf(';', 0);
778 0 : if (semiPos == -1) {
779 0 : semiPos = formatStr.length();
780 : }
781 0 : UnicodeString positivePart = formatStr.tempSubString(0, semiPos);
782 :
783 0 : int32_t firstIdx = positivePart.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
784 : // We must have 0's in format string.
785 0 : if (firstIdx == -1) {
786 0 : status = U_INTERNAL_PROGRAM_ERROR;
787 0 : return 0;
788 : }
789 0 : int32_t lastIdx = positivePart.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
790 0 : CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
791 0 : if (U_FAILURE(status)) {
792 0 : return 0;
793 : }
794 :
795 : // Return -1 if we are not overwriting an existing value
796 0 : if (unit->isSet() && !overwrite) {
797 0 : return -1;
798 : }
799 0 : unit->markAsSet();
800 :
801 : // Everything up to first 0 is the prefix
802 0 : unit->prefix = positivePart.tempSubString(0, firstIdx);
803 0 : fixQuotes(unit->prefix);
804 : // Everything beyond the last 0 is the suffix
805 0 : unit->suffix = positivePart.tempSubString(lastIdx + 1);
806 0 : fixQuotes(unit->suffix);
807 :
808 : // If there is effectively no prefix or suffix, ignore the actual number of
809 : // 0's and act as if the number of 0's matches the size of the number.
810 0 : if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
811 0 : return log10Value + 1;
812 : }
813 :
814 : // Calculate number of zeros before decimal point
815 0 : int32_t idx = firstIdx + 1;
816 0 : while (idx <= lastIdx && positivePart.charAt(idx) == u_0) {
817 0 : ++idx;
818 : }
819 0 : return (idx - firstIdx);
820 : }
821 :
822 : // Calculate a divisor based on the magnitude and number of zeros in the
823 : // template string.
824 0 : static double calculateDivisor(double power10, int32_t numZeros) {
825 0 : double divisor = power10;
826 0 : for (int32_t i = 1; i < numZeros; ++i) {
827 0 : divisor /= 10.0;
828 : }
829 0 : return divisor;
830 : }
831 :
832 0 : static UBool onlySpaces(UnicodeString u) {
833 0 : return u.trim().length() == 0;
834 : }
835 :
836 : // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
837 : // Modifies s in place.
838 0 : static void fixQuotes(UnicodeString& s) {
839 0 : QuoteState state = OUTSIDE;
840 0 : int32_t len = s.length();
841 0 : int32_t dest = 0;
842 0 : for (int32_t i = 0; i < len; ++i) {
843 0 : UChar ch = s.charAt(i);
844 0 : if (ch == u_apos) {
845 0 : if (state == INSIDE_EMPTY) {
846 0 : s.setCharAt(dest, ch);
847 0 : ++dest;
848 : }
849 : } else {
850 0 : s.setCharAt(dest, ch);
851 0 : ++dest;
852 : }
853 :
854 : // Update state
855 0 : switch (state) {
856 : case OUTSIDE:
857 0 : state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
858 0 : break;
859 : case INSIDE_EMPTY:
860 : case INSIDE_FULL:
861 0 : state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
862 0 : break;
863 : default:
864 0 : break;
865 : }
866 : }
867 0 : s.truncate(dest);
868 0 : }
869 :
870 : // Checks to make sure that an "other" variant is present in all
871 : // powers of 10.
872 0 : static void checkForOtherVariants(CDFLocaleStyleData* result,
873 : UErrorCode& status) {
874 0 : if (result == NULL || result->unitsByVariant == NULL) {
875 0 : return;
876 : }
877 :
878 : const CDFUnit* otherByBase =
879 0 : (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
880 0 : if (otherByBase == NULL) {
881 0 : status = U_INTERNAL_PROGRAM_ERROR;
882 0 : return;
883 : }
884 :
885 : // Check all other plural variants, and make sure that if
886 : // any of them are populated, then other is also populated
887 0 : int32_t pos = UHASH_FIRST;
888 : const UHashElement* element;
889 0 : while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
890 0 : CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
891 0 : if (variantsByBase == otherByBase) continue;
892 0 : for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
893 0 : if (variantsByBase[log10Value].isSet()
894 0 : && !otherByBase[log10Value].isSet()) {
895 0 : status = U_INTERNAL_PROGRAM_ERROR;
896 0 : return;
897 : }
898 : }
899 : }
900 : }
901 :
902 : // fillInMissing ensures that the data in result is complete.
903 : // result data is complete if for each variant in result, there exists
904 : // a prefix-suffix pair for each log10 value and there also exists
905 : // a divisor for each log10 value.
906 : //
907 : // First this function figures out for which log10 values, the other
908 : // variant already had data. These are the same log10 values defined
909 : // in CLDR.
910 : //
911 : // For each log10 value not defined in CLDR, it uses the divisor for
912 : // the last defined log10 value or 1.
913 : //
914 : // Then for each variant, it does the following. For each log10
915 : // value not defined in CLDR, copy the prefix-suffix pair from the
916 : // previous log10 value. If log10 value is defined in CLDR but is
917 : // missing from given variant, copy the prefix-suffix pair for that
918 : // log10 value from the 'other' variant.
919 0 : static void fillInMissing(CDFLocaleStyleData* result) {
920 : const CDFUnit* otherUnits =
921 0 : (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
922 : UBool definedInCLDR[MAX_DIGITS];
923 0 : double lastDivisor = 1.0;
924 0 : for (int32_t i = 0; i < MAX_DIGITS; ++i) {
925 0 : if (!otherUnits[i].isSet()) {
926 0 : result->divisors[i] = lastDivisor;
927 0 : definedInCLDR[i] = FALSE;
928 : } else {
929 0 : lastDivisor = result->divisors[i];
930 0 : definedInCLDR[i] = TRUE;
931 : }
932 : }
933 : // Iterate over each variant.
934 0 : int32_t pos = UHASH_FIRST;
935 0 : const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
936 0 : for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
937 0 : CDFUnit* units = (CDFUnit*) element->value.pointer;
938 0 : for (int32_t i = 0; i < MAX_DIGITS; ++i) {
939 0 : if (definedInCLDR[i]) {
940 0 : if (!units[i].isSet()) {
941 0 : units[i] = otherUnits[i];
942 : }
943 : } else {
944 0 : if (i == 0) {
945 0 : units[0].markAsSet();
946 : } else {
947 0 : units[i] = units[i - 1];
948 : }
949 : }
950 : }
951 : }
952 0 : }
953 :
954 : // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
955 : // value computeLog10 will return MAX_DIGITS -1 even for
956 : // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
957 : // up to MAX_DIGITS.
958 0 : static int32_t computeLog10(double x, UBool inRange) {
959 0 : int32_t result = 0;
960 0 : int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
961 0 : while (x >= 10.0) {
962 0 : x /= 10.0;
963 0 : ++result;
964 0 : if (result == max) {
965 0 : break;
966 : }
967 : }
968 0 : return result;
969 : }
970 :
971 : // createCDFUnit returns a pointer to the prefix-suffix pair for a given
972 : // variant and log10 value within table. If no such prefix-suffix pair is
973 : // stored in table, one is created within table before returning pointer.
974 0 : static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
975 0 : if (U_FAILURE(status)) {
976 0 : return NULL;
977 : }
978 0 : CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
979 0 : if (cdfUnit == NULL) {
980 0 : cdfUnit = new CDFUnit[MAX_DIGITS];
981 0 : if (cdfUnit == NULL) {
982 0 : status = U_MEMORY_ALLOCATION_ERROR;
983 0 : return NULL;
984 : }
985 0 : uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
986 0 : if (U_FAILURE(status)) {
987 0 : return NULL;
988 : }
989 : }
990 0 : CDFUnit* result = &cdfUnit[log10Value];
991 0 : return result;
992 : }
993 :
994 : // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
995 : // variant and log10 value within table. If the given variant doesn't exist, it
996 : // falls back to the OTHER variant. Therefore, this method will always return
997 : // some non-NULL value.
998 0 : static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
999 0 : CharString cvariant;
1000 0 : UErrorCode status = U_ZERO_ERROR;
1001 0 : const CDFUnit *cdfUnit = NULL;
1002 0 : cvariant.appendInvariantChars(variant, status);
1003 0 : if (!U_FAILURE(status)) {
1004 0 : cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
1005 : }
1006 0 : if (cdfUnit == NULL) {
1007 0 : cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
1008 : }
1009 0 : return &cdfUnit[log10Value];
1010 : }
1011 :
1012 : U_NAMESPACE_END
1013 : #endif
|