Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2007-2016, International Business Machines Corporation and
6 : * others. All Rights Reserved.
7 : *******************************************************************************
8 : *
9 : * File plurrule.cpp
10 : */
11 :
12 : #include <math.h>
13 : #include <stdio.h>
14 :
15 : #include "unicode/utypes.h"
16 : #include "unicode/localpointer.h"
17 : #include "unicode/plurrule.h"
18 : #include "unicode/upluralrules.h"
19 : #include "unicode/ures.h"
20 : #include "unicode/numfmt.h"
21 : #include "unicode/decimfmt.h"
22 : #include "charstr.h"
23 : #include "cmemory.h"
24 : #include "cstring.h"
25 : #include "digitlst.h"
26 : #include "hash.h"
27 : #include "locutil.h"
28 : #include "mutex.h"
29 : #include "patternprops.h"
30 : #include "plurrule_impl.h"
31 : #include "putilimp.h"
32 : #include "ucln_in.h"
33 : #include "ustrfmt.h"
34 : #include "uassert.h"
35 : #include "uvectr32.h"
36 : #include "sharedpluralrules.h"
37 : #include "unifiedcache.h"
38 : #include "digitinterval.h"
39 : #include "visibledigits.h"
40 :
41 : #if !UCONFIG_NO_FORMATTING
42 :
43 : U_NAMESPACE_BEGIN
44 :
45 : static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
46 : static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
47 : static const UChar PK_IN[]={LOW_I,LOW_N,0};
48 : static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
49 : static const UChar PK_IS[]={LOW_I,LOW_S,0};
50 : static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
51 : static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
52 : static const UChar PK_OR[]={LOW_O,LOW_R,0};
53 : static const UChar PK_VAR_N[]={LOW_N,0};
54 : static const UChar PK_VAR_I[]={LOW_I,0};
55 : static const UChar PK_VAR_F[]={LOW_F,0};
56 : static const UChar PK_VAR_T[]={LOW_T,0};
57 : static const UChar PK_VAR_V[]={LOW_V,0};
58 : static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
59 : static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
60 : static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
61 :
62 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
63 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
64 :
65 0 : PluralRules::PluralRules(UErrorCode& /*status*/)
66 : : UObject(),
67 0 : mRules(NULL)
68 : {
69 0 : }
70 :
71 0 : PluralRules::PluralRules(const PluralRules& other)
72 : : UObject(other),
73 0 : mRules(NULL)
74 : {
75 0 : *this=other;
76 0 : }
77 :
78 0 : PluralRules::~PluralRules() {
79 0 : delete mRules;
80 0 : }
81 :
82 0 : SharedPluralRules::~SharedPluralRules() {
83 0 : delete ptr;
84 0 : }
85 :
86 : PluralRules*
87 0 : PluralRules::clone() const {
88 0 : return new PluralRules(*this);
89 : }
90 :
91 : PluralRules&
92 0 : PluralRules::operator=(const PluralRules& other) {
93 0 : if (this != &other) {
94 0 : delete mRules;
95 0 : if (other.mRules==NULL) {
96 0 : mRules = NULL;
97 : }
98 : else {
99 0 : mRules = new RuleChain(*other.mRules);
100 : }
101 : }
102 :
103 0 : return *this;
104 : }
105 :
106 0 : StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
107 0 : StringEnumeration *result = new PluralAvailableLocalesEnumeration(status);
108 0 : if (result == NULL && U_SUCCESS(status)) {
109 0 : status = U_MEMORY_ALLOCATION_ERROR;
110 : }
111 0 : if (U_FAILURE(status)) {
112 0 : delete result;
113 0 : result = NULL;
114 : }
115 0 : return result;
116 : }
117 :
118 :
119 : PluralRules* U_EXPORT2
120 0 : PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
121 0 : if (U_FAILURE(status)) {
122 0 : return NULL;
123 : }
124 :
125 0 : PluralRuleParser parser;
126 0 : PluralRules *newRules = new PluralRules(status);
127 0 : if (U_SUCCESS(status) && newRules == NULL) {
128 0 : status = U_MEMORY_ALLOCATION_ERROR;
129 : }
130 0 : parser.parse(description, newRules, status);
131 0 : if (U_FAILURE(status)) {
132 0 : delete newRules;
133 0 : newRules = NULL;
134 : }
135 0 : return newRules;
136 : }
137 :
138 :
139 : PluralRules* U_EXPORT2
140 0 : PluralRules::createDefaultRules(UErrorCode& status) {
141 0 : return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
142 : }
143 :
144 : /******************************************************************************/
145 : /* Create PluralRules cache */
146 :
147 : template<> U_I18N_API
148 0 : const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject(
149 : const void * /*unused*/, UErrorCode &status) const {
150 0 : const char *localeId = fLoc.getName();
151 0 : PluralRules *pr = PluralRules::internalForLocale(
152 0 : localeId, UPLURAL_TYPE_CARDINAL, status);
153 0 : if (U_FAILURE(status)) {
154 0 : return NULL;
155 : }
156 0 : SharedPluralRules *result = new SharedPluralRules(pr);
157 0 : if (result == NULL) {
158 0 : status = U_MEMORY_ALLOCATION_ERROR;
159 0 : delete pr;
160 0 : return NULL;
161 : }
162 0 : result->addRef();
163 0 : return result;
164 : }
165 :
166 : /* end plural rules cache */
167 : /******************************************************************************/
168 :
169 : const SharedPluralRules* U_EXPORT2
170 0 : PluralRules::createSharedInstance(
171 : const Locale& locale, UPluralType type, UErrorCode& status) {
172 0 : if (U_FAILURE(status)) {
173 0 : return NULL;
174 : }
175 0 : if (type != UPLURAL_TYPE_CARDINAL) {
176 0 : status = U_UNSUPPORTED_ERROR;
177 0 : return NULL;
178 : }
179 0 : const SharedPluralRules *result = NULL;
180 0 : UnifiedCache::getByLocale(locale, result, status);
181 0 : return result;
182 : }
183 :
184 : PluralRules* U_EXPORT2
185 0 : PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
186 0 : return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
187 : }
188 :
189 : PluralRules* U_EXPORT2
190 0 : PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
191 0 : if (type != UPLURAL_TYPE_CARDINAL) {
192 0 : return internalForLocale(locale, type, status);
193 : }
194 : const SharedPluralRules *shared = createSharedInstance(
195 0 : locale, type, status);
196 0 : if (U_FAILURE(status)) {
197 0 : return NULL;
198 : }
199 0 : PluralRules *result = (*shared)->clone();
200 0 : shared->removeRef();
201 0 : if (result == NULL) {
202 0 : status = U_MEMORY_ALLOCATION_ERROR;
203 : }
204 0 : return result;
205 : }
206 :
207 : PluralRules* U_EXPORT2
208 0 : PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
209 0 : if (U_FAILURE(status)) {
210 0 : return NULL;
211 : }
212 0 : if (type >= UPLURAL_TYPE_COUNT) {
213 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
214 0 : return NULL;
215 : }
216 0 : PluralRules *newObj = new PluralRules(status);
217 0 : if (newObj==NULL || U_FAILURE(status)) {
218 0 : delete newObj;
219 0 : return NULL;
220 : }
221 0 : UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
222 : // TODO: which errors, if any, should be returned?
223 0 : if (locRule.length() == 0) {
224 : // Locales with no specific rules (all numbers have the "other" category
225 : // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
226 : // an error.
227 0 : locRule = UnicodeString(PLURAL_DEFAULT_RULE);
228 0 : status = U_ZERO_ERROR;
229 : }
230 0 : PluralRuleParser parser;
231 0 : parser.parse(locRule, newObj, status);
232 : // TODO: should rule parse errors be returned, or
233 : // should we silently use default rules?
234 : // Original impl used default rules.
235 : // Ask the question to ICU Core.
236 :
237 0 : return newObj;
238 : }
239 :
240 : UnicodeString
241 0 : PluralRules::select(int32_t number) const {
242 0 : return select(FixedDecimal(number));
243 : }
244 :
245 : UnicodeString
246 0 : PluralRules::select(double number) const {
247 0 : return select(FixedDecimal(number));
248 : }
249 :
250 : UnicodeString
251 0 : PluralRules::select(const Formattable& obj, const NumberFormat& fmt, UErrorCode& status) const {
252 0 : if (U_SUCCESS(status)) {
253 0 : const DecimalFormat *decFmt = dynamic_cast<const DecimalFormat *>(&fmt);
254 0 : if (decFmt != NULL) {
255 0 : VisibleDigitsWithExponent digits;
256 0 : decFmt->initVisibleDigitsWithExponent(obj, digits, status);
257 0 : if (U_SUCCESS(status)) {
258 0 : return select(digits);
259 : }
260 : } else {
261 0 : double number = obj.getDouble(status);
262 0 : if (U_SUCCESS(status)) {
263 0 : return select(number);
264 : }
265 : }
266 : }
267 0 : return UnicodeString();
268 : }
269 :
270 : UnicodeString
271 0 : PluralRules::select(const FixedDecimal &number) const {
272 0 : if (mRules == NULL) {
273 0 : return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
274 : }
275 : else {
276 0 : return mRules->select(number);
277 : }
278 : }
279 :
280 : UnicodeString
281 0 : PluralRules::select(const VisibleDigitsWithExponent &number) const {
282 0 : if (number.getExponent() != NULL) {
283 0 : return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
284 : }
285 0 : return select(FixedDecimal(number.getMantissa()));
286 : }
287 :
288 :
289 :
290 : StringEnumeration*
291 0 : PluralRules::getKeywords(UErrorCode& status) const {
292 0 : if (U_FAILURE(status)) return NULL;
293 0 : StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
294 0 : if (U_FAILURE(status)) {
295 0 : delete nameEnumerator;
296 0 : return NULL;
297 : }
298 :
299 0 : return nameEnumerator;
300 : }
301 :
302 : double
303 0 : PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
304 : // Not Implemented.
305 0 : return UPLRULES_NO_UNIQUE_VALUE;
306 : }
307 :
308 : int32_t
309 0 : PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
310 : int32_t /* destCapacity */, UErrorCode& error) {
311 0 : error = U_UNSUPPORTED_ERROR;
312 0 : return 0;
313 : }
314 :
315 :
316 0 : static double scaleForInt(double d) {
317 0 : double scale = 1.0;
318 0 : while (d != floor(d)) {
319 0 : d = d * 10.0;
320 0 : scale = scale * 10.0;
321 : }
322 0 : return scale;
323 : }
324 :
325 : static int32_t
326 0 : getSamplesFromString(const UnicodeString &samples, double *dest,
327 : int32_t destCapacity, UErrorCode& status) {
328 0 : int32_t sampleCount = 0;
329 0 : int32_t sampleStartIdx = 0;
330 0 : int32_t sampleEndIdx = 0;
331 :
332 : //std::string ss; // TODO: debugging.
333 : // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
334 0 : for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
335 0 : sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
336 0 : if (sampleEndIdx == -1) {
337 0 : sampleEndIdx = samples.length();
338 : }
339 0 : const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
340 : // ss.erase();
341 : // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
342 0 : int32_t tildeIndex = sampleRange.indexOf(TILDE);
343 0 : if (tildeIndex < 0) {
344 0 : FixedDecimal fixed(sampleRange, status);
345 0 : double sampleValue = fixed.source;
346 0 : if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
347 0 : dest[sampleCount++] = sampleValue;
348 : }
349 : } else {
350 :
351 0 : FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
352 0 : FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
353 0 : double rangeLo = fixedLo.source;
354 0 : double rangeHi = fixedHi.source;
355 0 : if (U_FAILURE(status)) {
356 0 : break;
357 : }
358 0 : if (rangeHi < rangeLo) {
359 0 : status = U_INVALID_FORMAT_ERROR;
360 0 : break;
361 : }
362 :
363 : // For ranges of samples with fraction decimal digits, scale the number up so that we
364 : // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
365 :
366 0 : double scale = scaleForInt(rangeLo);
367 0 : double t = scaleForInt(rangeHi);
368 0 : if (t > scale) {
369 0 : scale = t;
370 : }
371 0 : rangeLo *= scale;
372 0 : rangeHi *= scale;
373 0 : for (double n=rangeLo; n<=rangeHi; n+=1) {
374 : // Hack Alert: don't return any decimal samples with integer values that
375 : // originated from a format with trailing decimals.
376 : // This API is returning doubles, which can't distinguish having displayed
377 : // zeros to the right of the decimal.
378 : // This results in test failures with values mapping back to a different keyword.
379 0 : double sampleValue = n/scale;
380 0 : if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
381 0 : dest[sampleCount++] = sampleValue;
382 : }
383 0 : if (sampleCount >= destCapacity) {
384 0 : break;
385 : }
386 : }
387 : }
388 0 : sampleStartIdx = sampleEndIdx + 1;
389 : }
390 0 : return sampleCount;
391 : }
392 :
393 :
394 : int32_t
395 0 : PluralRules::getSamples(const UnicodeString &keyword, double *dest,
396 : int32_t destCapacity, UErrorCode& status) {
397 0 : RuleChain *rc = rulesForKeyword(keyword);
398 0 : if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) {
399 0 : return 0;
400 : }
401 0 : int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
402 0 : if (numSamples == 0) {
403 0 : numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
404 : }
405 0 : return numSamples;
406 : }
407 :
408 :
409 0 : RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
410 : RuleChain *rc;
411 0 : for (rc = mRules; rc != NULL; rc = rc->fNext) {
412 0 : if (rc->fKeyword == keyword) {
413 0 : break;
414 : }
415 : }
416 0 : return rc;
417 : }
418 :
419 :
420 : UBool
421 0 : PluralRules::isKeyword(const UnicodeString& keyword) const {
422 0 : if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
423 0 : return true;
424 : }
425 0 : return rulesForKeyword(keyword) != NULL;
426 : }
427 :
428 : UnicodeString
429 0 : PluralRules::getKeywordOther() const {
430 0 : return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
431 : }
432 :
433 : UBool
434 0 : PluralRules::operator==(const PluralRules& other) const {
435 : const UnicodeString *ptrKeyword;
436 0 : UErrorCode status= U_ZERO_ERROR;
437 :
438 0 : if ( this == &other ) {
439 0 : return TRUE;
440 : }
441 0 : LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
442 0 : LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
443 0 : if (U_FAILURE(status)) {
444 0 : return FALSE;
445 : }
446 :
447 0 : if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
448 0 : return FALSE;
449 : }
450 0 : myKeywordList->reset(status);
451 0 : while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
452 0 : if (!other.isKeyword(*ptrKeyword)) {
453 0 : return FALSE;
454 : }
455 : }
456 0 : otherKeywordList->reset(status);
457 0 : while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
458 0 : if (!this->isKeyword(*ptrKeyword)) {
459 0 : return FALSE;
460 : }
461 : }
462 0 : if (U_FAILURE(status)) {
463 0 : return FALSE;
464 : }
465 :
466 0 : return TRUE;
467 : }
468 :
469 :
470 : void
471 0 : PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
472 : {
473 0 : if (U_FAILURE(status)) {
474 0 : return;
475 : }
476 0 : U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only!
477 0 : ruleSrc = &ruleData;
478 :
479 0 : while (ruleIndex< ruleSrc->length()) {
480 0 : getNextToken(status);
481 0 : if (U_FAILURE(status)) {
482 0 : return;
483 : }
484 0 : checkSyntax(status);
485 0 : if (U_FAILURE(status)) {
486 0 : return;
487 : }
488 0 : switch (type) {
489 : case tAnd:
490 0 : U_ASSERT(curAndConstraint != NULL);
491 0 : curAndConstraint = curAndConstraint->add();
492 0 : break;
493 : case tOr:
494 : {
495 0 : U_ASSERT(currentChain != NULL);
496 0 : OrConstraint *orNode=currentChain->ruleHeader;
497 0 : while (orNode->next != NULL) {
498 0 : orNode = orNode->next;
499 : }
500 0 : orNode->next= new OrConstraint();
501 0 : orNode=orNode->next;
502 0 : orNode->next=NULL;
503 0 : curAndConstraint = orNode->add();
504 : }
505 0 : break;
506 : case tIs:
507 0 : U_ASSERT(curAndConstraint != NULL);
508 0 : U_ASSERT(curAndConstraint->value == -1);
509 0 : U_ASSERT(curAndConstraint->rangeList == NULL);
510 0 : break;
511 : case tNot:
512 0 : U_ASSERT(curAndConstraint != NULL);
513 0 : curAndConstraint->negated=TRUE;
514 0 : break;
515 :
516 : case tNotEqual:
517 0 : curAndConstraint->negated=TRUE;
518 : U_FALLTHROUGH;
519 : case tIn:
520 : case tWithin:
521 : case tEqual:
522 0 : U_ASSERT(curAndConstraint != NULL);
523 0 : curAndConstraint->rangeList = new UVector32(status);
524 0 : curAndConstraint->rangeList->addElement(-1, status); // range Low
525 0 : curAndConstraint->rangeList->addElement(-1, status); // range Hi
526 0 : rangeLowIdx = 0;
527 0 : rangeHiIdx = 1;
528 0 : curAndConstraint->value=PLURAL_RANGE_HIGH;
529 0 : curAndConstraint->integerOnly = (type != tWithin);
530 0 : break;
531 : case tNumber:
532 0 : U_ASSERT(curAndConstraint != NULL);
533 0 : if ( (curAndConstraint->op==AndConstraint::MOD)&&
534 0 : (curAndConstraint->opNum == -1 ) ) {
535 0 : curAndConstraint->opNum=getNumberValue(token);
536 : }
537 : else {
538 0 : if (curAndConstraint->rangeList == NULL) {
539 : // this is for an 'is' rule
540 0 : curAndConstraint->value = getNumberValue(token);
541 : } else {
542 : // this is for an 'in' or 'within' rule
543 0 : if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
544 0 : curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
545 0 : curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
546 : }
547 : else {
548 0 : curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
549 0 : if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
550 0 : curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
551 : // Range Lower bound > Range Upper bound.
552 : // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
553 : // used for all plural rule parse errors.
554 0 : status = U_UNEXPECTED_TOKEN;
555 0 : break;
556 : }
557 : }
558 : }
559 : }
560 0 : break;
561 : case tComma:
562 : // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
563 : // Catch cases like "n mod 10, is 1" here instead.
564 0 : if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) {
565 0 : status = U_UNEXPECTED_TOKEN;
566 0 : break;
567 : }
568 0 : U_ASSERT(curAndConstraint->rangeList->size() >= 2);
569 0 : rangeLowIdx = curAndConstraint->rangeList->size();
570 0 : curAndConstraint->rangeList->addElement(-1, status); // range Low
571 0 : rangeHiIdx = curAndConstraint->rangeList->size();
572 0 : curAndConstraint->rangeList->addElement(-1, status); // range Hi
573 0 : break;
574 : case tMod:
575 0 : U_ASSERT(curAndConstraint != NULL);
576 0 : curAndConstraint->op=AndConstraint::MOD;
577 0 : break;
578 : case tVariableN:
579 : case tVariableI:
580 : case tVariableF:
581 : case tVariableT:
582 : case tVariableV:
583 0 : U_ASSERT(curAndConstraint != NULL);
584 0 : curAndConstraint->digitsType = type;
585 0 : break;
586 : case tKeyword:
587 : {
588 0 : RuleChain *newChain = new RuleChain;
589 0 : if (newChain == NULL) {
590 0 : status = U_MEMORY_ALLOCATION_ERROR;
591 0 : break;
592 : }
593 0 : newChain->fKeyword = token;
594 0 : if (prules->mRules == NULL) {
595 0 : prules->mRules = newChain;
596 : } else {
597 : // The new rule chain goes at the end of the linked list of rule chains,
598 : // unless there is an "other" keyword & chain. "other" must remain last.
599 0 : RuleChain *insertAfter = prules->mRules;
600 0 : while (insertAfter->fNext!=NULL &&
601 0 : insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
602 0 : insertAfter=insertAfter->fNext;
603 : }
604 0 : newChain->fNext = insertAfter->fNext;
605 0 : insertAfter->fNext = newChain;
606 : }
607 0 : OrConstraint *orNode = new OrConstraint();
608 0 : newChain->ruleHeader = orNode;
609 0 : curAndConstraint = orNode->add();
610 0 : currentChain = newChain;
611 : }
612 0 : break;
613 :
614 : case tInteger:
615 : for (;;) {
616 0 : getNextToken(status);
617 0 : if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
618 0 : break;
619 : }
620 0 : if (type == tEllipsis) {
621 0 : currentChain->fIntegerSamplesUnbounded = TRUE;
622 0 : continue;
623 : }
624 0 : currentChain->fIntegerSamples.append(token);
625 : }
626 0 : break;
627 :
628 : case tDecimal:
629 : for (;;) {
630 0 : getNextToken(status);
631 0 : if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
632 0 : break;
633 : }
634 0 : if (type == tEllipsis) {
635 0 : currentChain->fDecimalSamplesUnbounded = TRUE;
636 0 : continue;
637 : }
638 0 : currentChain->fDecimalSamples.append(token);
639 : }
640 0 : break;
641 :
642 : default:
643 0 : break;
644 : }
645 0 : prevType=type;
646 0 : if (U_FAILURE(status)) {
647 0 : break;
648 : }
649 : }
650 : }
651 :
652 : UnicodeString
653 0 : PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
654 0 : UnicodeString emptyStr;
655 :
656 0 : if (U_FAILURE(errCode)) {
657 0 : return emptyStr;
658 : }
659 0 : LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode));
660 0 : if(U_FAILURE(errCode)) {
661 0 : return emptyStr;
662 : }
663 : const char *typeKey;
664 0 : switch (type) {
665 : case UPLURAL_TYPE_CARDINAL:
666 0 : typeKey = "locales";
667 0 : break;
668 : case UPLURAL_TYPE_ORDINAL:
669 0 : typeKey = "locales_ordinals";
670 0 : break;
671 : default:
672 : // Must not occur: The caller should have checked for valid types.
673 0 : errCode = U_ILLEGAL_ARGUMENT_ERROR;
674 0 : return emptyStr;
675 : }
676 0 : LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode));
677 0 : if(U_FAILURE(errCode)) {
678 0 : return emptyStr;
679 : }
680 0 : int32_t resLen=0;
681 0 : const char *curLocaleName=locale.getName();
682 0 : const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
683 :
684 0 : if (s == NULL) {
685 : // Check parent locales.
686 0 : UErrorCode status = U_ZERO_ERROR;
687 : char parentLocaleName[ULOC_FULLNAME_CAPACITY];
688 0 : const char *curLocaleName=locale.getName();
689 0 : uprv_strcpy(parentLocaleName, curLocaleName);
690 :
691 0 : while (uloc_getParent(parentLocaleName, parentLocaleName,
692 : ULOC_FULLNAME_CAPACITY, &status) > 0) {
693 0 : resLen=0;
694 0 : s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
695 0 : if (s != NULL) {
696 0 : errCode = U_ZERO_ERROR;
697 0 : break;
698 : }
699 0 : status = U_ZERO_ERROR;
700 : }
701 : }
702 0 : if (s==NULL) {
703 0 : return emptyStr;
704 : }
705 :
706 : char setKey[256];
707 0 : u_UCharsToChars(s, setKey, resLen + 1);
708 : // printf("\n PluralRule: %s\n", setKey);
709 :
710 0 : LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode));
711 0 : if(U_FAILURE(errCode)) {
712 0 : return emptyStr;
713 : }
714 0 : LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode));
715 0 : if (U_FAILURE(errCode)) {
716 0 : return emptyStr;
717 : }
718 :
719 0 : int32_t numberKeys = ures_getSize(setRes.getAlias());
720 0 : UnicodeString result;
721 0 : const char *key=NULL;
722 0 : for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ...
723 0 : UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
724 0 : UnicodeString uKey(key, -1, US_INV);
725 0 : result.append(uKey);
726 0 : result.append(COLON);
727 0 : result.append(rules);
728 0 : result.append(SEMI_COLON);
729 : }
730 0 : return result;
731 : }
732 :
733 :
734 : UnicodeString
735 0 : PluralRules::getRules() const {
736 0 : UnicodeString rules;
737 0 : if (mRules != NULL) {
738 0 : mRules->dumpRules(rules);
739 : }
740 0 : return rules;
741 : }
742 :
743 :
744 0 : AndConstraint::AndConstraint() {
745 0 : op = AndConstraint::NONE;
746 0 : opNum=-1;
747 0 : value = -1;
748 0 : rangeList = NULL;
749 0 : negated = FALSE;
750 0 : integerOnly = FALSE;
751 0 : digitsType = none;
752 0 : next=NULL;
753 0 : }
754 :
755 :
756 0 : AndConstraint::AndConstraint(const AndConstraint& other) {
757 0 : this->op = other.op;
758 0 : this->opNum=other.opNum;
759 0 : this->value=other.value;
760 0 : this->rangeList=NULL;
761 0 : if (other.rangeList != NULL) {
762 0 : UErrorCode status = U_ZERO_ERROR;
763 0 : this->rangeList = new UVector32(status);
764 0 : this->rangeList->assign(*other.rangeList, status);
765 : }
766 0 : this->integerOnly=other.integerOnly;
767 0 : this->negated=other.negated;
768 0 : this->digitsType = other.digitsType;
769 0 : if (other.next==NULL) {
770 0 : this->next=NULL;
771 : }
772 : else {
773 0 : this->next = new AndConstraint(*other.next);
774 : }
775 0 : }
776 :
777 0 : AndConstraint::~AndConstraint() {
778 0 : delete rangeList;
779 0 : if (next!=NULL) {
780 0 : delete next;
781 : }
782 0 : }
783 :
784 :
785 : UBool
786 0 : AndConstraint::isFulfilled(const FixedDecimal &number) {
787 0 : UBool result = TRUE;
788 0 : if (digitsType == none) {
789 : // An empty AndConstraint, created by a rule with a keyword but no following expression.
790 0 : return TRUE;
791 : }
792 0 : double n = number.get(digitsType); // pulls n | i | v | f value for the number.
793 : // Will always be positive.
794 : // May be non-integer (n option only)
795 : do {
796 0 : if (integerOnly && n != uprv_floor(n)) {
797 0 : result = FALSE;
798 0 : break;
799 : }
800 :
801 0 : if (op == MOD) {
802 0 : n = fmod(n, opNum);
803 : }
804 0 : if (rangeList == NULL) {
805 0 : result = value == -1 || // empty rule
806 0 : n == value; // 'is' rule
807 0 : break;
808 : }
809 0 : result = FALSE; // 'in' or 'within' rule
810 0 : for (int32_t r=0; r<rangeList->size(); r+=2) {
811 0 : if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
812 0 : result = TRUE;
813 0 : break;
814 : }
815 : }
816 : } while (FALSE);
817 :
818 0 : if (negated) {
819 0 : result = !result;
820 : }
821 0 : return result;
822 : }
823 :
824 :
825 : AndConstraint*
826 0 : AndConstraint::add()
827 : {
828 0 : this->next = new AndConstraint();
829 0 : return this->next;
830 : }
831 :
832 0 : OrConstraint::OrConstraint() {
833 0 : childNode=NULL;
834 0 : next=NULL;
835 0 : }
836 :
837 0 : OrConstraint::OrConstraint(const OrConstraint& other) {
838 0 : if ( other.childNode == NULL ) {
839 0 : this->childNode = NULL;
840 : }
841 : else {
842 0 : this->childNode = new AndConstraint(*(other.childNode));
843 : }
844 0 : if (other.next == NULL ) {
845 0 : this->next = NULL;
846 : }
847 : else {
848 0 : this->next = new OrConstraint(*(other.next));
849 : }
850 0 : }
851 :
852 0 : OrConstraint::~OrConstraint() {
853 0 : if (childNode!=NULL) {
854 0 : delete childNode;
855 : }
856 0 : if (next!=NULL) {
857 0 : delete next;
858 : }
859 0 : }
860 :
861 : AndConstraint*
862 0 : OrConstraint::add()
863 : {
864 0 : OrConstraint *curOrConstraint=this;
865 : {
866 0 : while (curOrConstraint->next!=NULL) {
867 0 : curOrConstraint = curOrConstraint->next;
868 : }
869 0 : U_ASSERT(curOrConstraint->childNode == NULL);
870 0 : curOrConstraint->childNode = new AndConstraint();
871 : }
872 0 : return curOrConstraint->childNode;
873 : }
874 :
875 : UBool
876 0 : OrConstraint::isFulfilled(const FixedDecimal &number) {
877 0 : OrConstraint* orRule=this;
878 0 : UBool result=FALSE;
879 :
880 0 : while (orRule!=NULL && !result) {
881 0 : result=TRUE;
882 0 : AndConstraint* andRule = orRule->childNode;
883 0 : while (andRule!=NULL && result) {
884 0 : result = andRule->isFulfilled(number);
885 0 : andRule=andRule->next;
886 : }
887 0 : orRule = orRule->next;
888 : }
889 :
890 0 : return result;
891 : }
892 :
893 :
894 0 : RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(),
895 0 : fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) {
896 0 : }
897 :
898 0 : RuleChain::RuleChain(const RuleChain& other) :
899 : fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples),
900 0 : fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
901 0 : fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) {
902 0 : if (other.ruleHeader != NULL) {
903 0 : this->ruleHeader = new OrConstraint(*(other.ruleHeader));
904 : }
905 0 : if (other.fNext != NULL ) {
906 0 : this->fNext = new RuleChain(*other.fNext);
907 : }
908 0 : }
909 :
910 0 : RuleChain::~RuleChain() {
911 0 : delete fNext;
912 0 : delete ruleHeader;
913 0 : }
914 :
915 :
916 : UnicodeString
917 0 : RuleChain::select(const FixedDecimal &number) const {
918 0 : if (!number.isNanOrInfinity) {
919 0 : for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) {
920 0 : if (rules->ruleHeader->isFulfilled(number)) {
921 0 : return rules->fKeyword;
922 : }
923 : }
924 : }
925 0 : return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
926 : }
927 :
928 0 : static UnicodeString tokenString(tokenType tok) {
929 0 : UnicodeString s;
930 0 : switch (tok) {
931 : case tVariableN:
932 0 : s.append(LOW_N); break;
933 : case tVariableI:
934 0 : s.append(LOW_I); break;
935 : case tVariableF:
936 0 : s.append(LOW_F); break;
937 : case tVariableV:
938 0 : s.append(LOW_V); break;
939 : case tVariableT:
940 0 : s.append(LOW_T); break;
941 : default:
942 0 : s.append(TILDE);
943 : }
944 0 : return s;
945 : }
946 :
947 : void
948 0 : RuleChain::dumpRules(UnicodeString& result) {
949 : UChar digitString[16];
950 :
951 0 : if ( ruleHeader != NULL ) {
952 0 : result += fKeyword;
953 0 : result += COLON;
954 0 : result += SPACE;
955 0 : OrConstraint* orRule=ruleHeader;
956 0 : while ( orRule != NULL ) {
957 0 : AndConstraint* andRule=orRule->childNode;
958 0 : while ( andRule != NULL ) {
959 0 : if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) {
960 : // Empty Rules.
961 0 : } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) {
962 0 : result += tokenString(andRule->digitsType);
963 0 : result += UNICODE_STRING_SIMPLE(" is ");
964 0 : if (andRule->negated) {
965 0 : result += UNICODE_STRING_SIMPLE("not ");
966 : }
967 0 : uprv_itou(digitString,16, andRule->value,10,0);
968 0 : result += UnicodeString(digitString);
969 : }
970 : else {
971 0 : result += tokenString(andRule->digitsType);
972 0 : result += SPACE;
973 0 : if (andRule->op==AndConstraint::MOD) {
974 0 : result += UNICODE_STRING_SIMPLE("mod ");
975 0 : uprv_itou(digitString,16, andRule->opNum,10,0);
976 0 : result += UnicodeString(digitString);
977 : }
978 0 : if (andRule->rangeList==NULL) {
979 0 : if (andRule->negated) {
980 0 : result += UNICODE_STRING_SIMPLE(" is not ");
981 0 : uprv_itou(digitString,16, andRule->value,10,0);
982 0 : result += UnicodeString(digitString);
983 : }
984 : else {
985 0 : result += UNICODE_STRING_SIMPLE(" is ");
986 0 : uprv_itou(digitString,16, andRule->value,10,0);
987 0 : result += UnicodeString(digitString);
988 : }
989 : }
990 : else {
991 0 : if (andRule->negated) {
992 0 : if ( andRule->integerOnly ) {
993 0 : result += UNICODE_STRING_SIMPLE(" not in ");
994 : }
995 : else {
996 0 : result += UNICODE_STRING_SIMPLE(" not within ");
997 : }
998 : }
999 : else {
1000 0 : if ( andRule->integerOnly ) {
1001 0 : result += UNICODE_STRING_SIMPLE(" in ");
1002 : }
1003 : else {
1004 0 : result += UNICODE_STRING_SIMPLE(" within ");
1005 : }
1006 : }
1007 0 : for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
1008 0 : int32_t rangeLo = andRule->rangeList->elementAti(r);
1009 0 : int32_t rangeHi = andRule->rangeList->elementAti(r+1);
1010 0 : uprv_itou(digitString,16, rangeLo, 10, 0);
1011 0 : result += UnicodeString(digitString);
1012 0 : result += UNICODE_STRING_SIMPLE("..");
1013 0 : uprv_itou(digitString,16, rangeHi, 10,0);
1014 0 : result += UnicodeString(digitString);
1015 0 : if (r+2 < andRule->rangeList->size()) {
1016 0 : result += UNICODE_STRING_SIMPLE(", ");
1017 : }
1018 : }
1019 : }
1020 : }
1021 0 : if ( (andRule=andRule->next) != NULL) {
1022 0 : result += UNICODE_STRING_SIMPLE(" and ");
1023 : }
1024 : }
1025 0 : if ( (orRule = orRule->next) != NULL ) {
1026 0 : result += UNICODE_STRING_SIMPLE(" or ");
1027 : }
1028 : }
1029 : }
1030 0 : if ( fNext != NULL ) {
1031 0 : result += UNICODE_STRING_SIMPLE("; ");
1032 0 : fNext->dumpRules(result);
1033 : }
1034 0 : }
1035 :
1036 :
1037 : UErrorCode
1038 0 : RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1039 0 : if ( arraySize < capacityOfKeywords-1 ) {
1040 0 : keywords[arraySize++]=fKeyword;
1041 : }
1042 : else {
1043 0 : return U_BUFFER_OVERFLOW_ERROR;
1044 : }
1045 :
1046 0 : if ( fNext != NULL ) {
1047 0 : return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
1048 : }
1049 : else {
1050 0 : return U_ZERO_ERROR;
1051 : }
1052 : }
1053 :
1054 : UBool
1055 0 : RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1056 0 : if ( fKeyword == keywordParam ) {
1057 0 : return TRUE;
1058 : }
1059 :
1060 0 : if ( fNext != NULL ) {
1061 0 : return fNext->isKeyword(keywordParam);
1062 : }
1063 : else {
1064 0 : return FALSE;
1065 : }
1066 : }
1067 :
1068 :
1069 0 : PluralRuleParser::PluralRuleParser() :
1070 : ruleIndex(0), token(), type(none), prevType(none),
1071 0 : curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1)
1072 : {
1073 0 : }
1074 :
1075 0 : PluralRuleParser::~PluralRuleParser() {
1076 0 : }
1077 :
1078 :
1079 : int32_t
1080 0 : PluralRuleParser::getNumberValue(const UnicodeString& token) {
1081 : int32_t i;
1082 : char digits[128];
1083 :
1084 0 : i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV);
1085 0 : digits[i]='\0';
1086 :
1087 0 : return((int32_t)atoi(digits));
1088 : }
1089 :
1090 :
1091 : void
1092 0 : PluralRuleParser::checkSyntax(UErrorCode &status)
1093 : {
1094 0 : if (U_FAILURE(status)) {
1095 0 : return;
1096 : }
1097 0 : if (!(prevType==none || prevType==tSemiColon)) {
1098 0 : type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word,
1099 : // and we are not at the start of a rule, where a
1100 : // keyword is expected.
1101 : }
1102 :
1103 0 : switch(prevType) {
1104 : case none:
1105 : case tSemiColon:
1106 0 : if (type!=tKeyword && type != tEOF) {
1107 0 : status = U_UNEXPECTED_TOKEN;
1108 : }
1109 0 : break;
1110 : case tVariableN:
1111 : case tVariableI:
1112 : case tVariableF:
1113 : case tVariableT:
1114 : case tVariableV:
1115 0 : if (type != tIs && type != tMod && type != tIn &&
1116 0 : type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
1117 0 : status = U_UNEXPECTED_TOKEN;
1118 : }
1119 0 : break;
1120 : case tKeyword:
1121 0 : if (type != tColon) {
1122 0 : status = U_UNEXPECTED_TOKEN;
1123 : }
1124 0 : break;
1125 : case tColon:
1126 0 : if (!(type == tVariableN ||
1127 0 : type == tVariableI ||
1128 0 : type == tVariableF ||
1129 0 : type == tVariableT ||
1130 0 : type == tVariableV ||
1131 0 : type == tAt)) {
1132 0 : status = U_UNEXPECTED_TOKEN;
1133 : }
1134 0 : break;
1135 : case tIs:
1136 0 : if ( type != tNumber && type != tNot) {
1137 0 : status = U_UNEXPECTED_TOKEN;
1138 : }
1139 0 : break;
1140 : case tNot:
1141 0 : if (type != tNumber && type != tIn && type != tWithin) {
1142 0 : status = U_UNEXPECTED_TOKEN;
1143 : }
1144 0 : break;
1145 : case tMod:
1146 : case tDot2:
1147 : case tIn:
1148 : case tWithin:
1149 : case tEqual:
1150 : case tNotEqual:
1151 0 : if (type != tNumber) {
1152 0 : status = U_UNEXPECTED_TOKEN;
1153 : }
1154 0 : break;
1155 : case tAnd:
1156 : case tOr:
1157 0 : if ( type != tVariableN &&
1158 0 : type != tVariableI &&
1159 0 : type != tVariableF &&
1160 0 : type != tVariableT &&
1161 0 : type != tVariableV) {
1162 0 : status = U_UNEXPECTED_TOKEN;
1163 : }
1164 0 : break;
1165 : case tComma:
1166 0 : if (type != tNumber) {
1167 0 : status = U_UNEXPECTED_TOKEN;
1168 : }
1169 0 : break;
1170 : case tNumber:
1171 0 : if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot &&
1172 0 : type != tIn && type != tEqual && type != tNotEqual && type != tWithin &&
1173 0 : type != tAnd && type != tOr && type != tComma && type != tAt &&
1174 0 : type != tEOF)
1175 : {
1176 0 : status = U_UNEXPECTED_TOKEN;
1177 : }
1178 : // TODO: a comma following a number that is not part of a range will be allowed.
1179 : // It's not the only case of this sort of thing. Parser needs a re-write.
1180 0 : break;
1181 : case tAt:
1182 0 : if (type != tDecimal && type != tInteger) {
1183 0 : status = U_UNEXPECTED_TOKEN;
1184 : }
1185 0 : break;
1186 : default:
1187 0 : status = U_UNEXPECTED_TOKEN;
1188 0 : break;
1189 : }
1190 : }
1191 :
1192 :
1193 : /*
1194 : * Scan the next token from the input rules.
1195 : * rules and returned token type are in the parser state variables.
1196 : */
1197 : void
1198 0 : PluralRuleParser::getNextToken(UErrorCode &status)
1199 : {
1200 0 : if (U_FAILURE(status)) {
1201 0 : return;
1202 : }
1203 :
1204 : UChar ch;
1205 0 : while (ruleIndex < ruleSrc->length()) {
1206 0 : ch = ruleSrc->charAt(ruleIndex);
1207 0 : type = charType(ch);
1208 0 : if (type != tSpace) {
1209 0 : break;
1210 : }
1211 0 : ++(ruleIndex);
1212 : }
1213 0 : if (ruleIndex >= ruleSrc->length()) {
1214 0 : type = tEOF;
1215 0 : return;
1216 : }
1217 0 : int32_t curIndex= ruleIndex;
1218 :
1219 0 : switch (type) {
1220 : case tColon:
1221 : case tSemiColon:
1222 : case tComma:
1223 : case tEllipsis:
1224 : case tTilde: // scanned '~'
1225 : case tAt: // scanned '@'
1226 : case tEqual: // scanned '='
1227 : case tMod: // scanned '%'
1228 : // Single character tokens.
1229 0 : ++curIndex;
1230 0 : break;
1231 :
1232 : case tNotEqual: // scanned '!'
1233 0 : if (ruleSrc->charAt(curIndex+1) == EQUALS) {
1234 0 : curIndex += 2;
1235 : } else {
1236 0 : type = none;
1237 0 : curIndex += 1;
1238 : }
1239 0 : break;
1240 :
1241 : case tKeyword:
1242 0 : while (type == tKeyword && ++curIndex < ruleSrc->length()) {
1243 0 : ch = ruleSrc->charAt(curIndex);
1244 0 : type = charType(ch);
1245 : }
1246 0 : type = tKeyword;
1247 0 : break;
1248 :
1249 : case tNumber:
1250 0 : while (type == tNumber && ++curIndex < ruleSrc->length()) {
1251 0 : ch = ruleSrc->charAt(curIndex);
1252 0 : type = charType(ch);
1253 : }
1254 0 : type = tNumber;
1255 0 : break;
1256 :
1257 : case tDot:
1258 : // We could be looking at either ".." in a range, or "..." at the end of a sample.
1259 0 : if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
1260 0 : ++curIndex;
1261 0 : break; // Single dot
1262 : }
1263 0 : if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
1264 0 : curIndex += 2;
1265 0 : type = tDot2;
1266 0 : break; // double dot
1267 : }
1268 0 : type = tEllipsis;
1269 0 : curIndex += 3;
1270 0 : break; // triple dot
1271 :
1272 : default:
1273 0 : status = U_UNEXPECTED_TOKEN;
1274 0 : ++curIndex;
1275 0 : break;
1276 : }
1277 :
1278 0 : U_ASSERT(ruleIndex <= ruleSrc->length());
1279 0 : U_ASSERT(curIndex <= ruleSrc->length());
1280 0 : token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
1281 0 : ruleIndex = curIndex;
1282 : }
1283 :
1284 : tokenType
1285 0 : PluralRuleParser::charType(UChar ch) {
1286 0 : if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1287 0 : return tNumber;
1288 : }
1289 0 : if (ch>=LOW_A && ch<=LOW_Z) {
1290 0 : return tKeyword;
1291 : }
1292 0 : switch (ch) {
1293 : case COLON:
1294 0 : return tColon;
1295 : case SPACE:
1296 0 : return tSpace;
1297 : case SEMI_COLON:
1298 0 : return tSemiColon;
1299 : case DOT:
1300 0 : return tDot;
1301 : case COMMA:
1302 0 : return tComma;
1303 : case EXCLAMATION:
1304 0 : return tNotEqual;
1305 : case EQUALS:
1306 0 : return tEqual;
1307 : case PERCENT_SIGN:
1308 0 : return tMod;
1309 : case AT:
1310 0 : return tAt;
1311 : case ELLIPSIS:
1312 0 : return tEllipsis;
1313 : case TILDE:
1314 0 : return tTilde;
1315 : default :
1316 0 : return none;
1317 : }
1318 : }
1319 :
1320 :
1321 : // Set token type for reserved words in the Plural Rule syntax.
1322 :
1323 : tokenType
1324 0 : PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
1325 : {
1326 0 : if (keyType != tKeyword) {
1327 0 : return keyType;
1328 : }
1329 :
1330 0 : if (0 == token.compare(PK_VAR_N, 1)) {
1331 0 : keyType = tVariableN;
1332 0 : } else if (0 == token.compare(PK_VAR_I, 1)) {
1333 0 : keyType = tVariableI;
1334 0 : } else if (0 == token.compare(PK_VAR_F, 1)) {
1335 0 : keyType = tVariableF;
1336 0 : } else if (0 == token.compare(PK_VAR_T, 1)) {
1337 0 : keyType = tVariableT;
1338 0 : } else if (0 == token.compare(PK_VAR_V, 1)) {
1339 0 : keyType = tVariableV;
1340 0 : } else if (0 == token.compare(PK_IS, 2)) {
1341 0 : keyType = tIs;
1342 0 : } else if (0 == token.compare(PK_AND, 3)) {
1343 0 : keyType = tAnd;
1344 0 : } else if (0 == token.compare(PK_IN, 2)) {
1345 0 : keyType = tIn;
1346 0 : } else if (0 == token.compare(PK_WITHIN, 6)) {
1347 0 : keyType = tWithin;
1348 0 : } else if (0 == token.compare(PK_NOT, 3)) {
1349 0 : keyType = tNot;
1350 0 : } else if (0 == token.compare(PK_MOD, 3)) {
1351 0 : keyType = tMod;
1352 0 : } else if (0 == token.compare(PK_OR, 2)) {
1353 0 : keyType = tOr;
1354 0 : } else if (0 == token.compare(PK_DECIMAL, 7)) {
1355 0 : keyType = tDecimal;
1356 0 : } else if (0 == token.compare(PK_INTEGER, 7)) {
1357 0 : keyType = tInteger;
1358 : }
1359 0 : return keyType;
1360 : }
1361 :
1362 :
1363 0 : PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1364 0 : : pos(0), fKeywordNames(status) {
1365 0 : if (U_FAILURE(status)) {
1366 0 : return;
1367 : }
1368 0 : fKeywordNames.setDeleter(uprv_deleteUObject);
1369 0 : UBool addKeywordOther=TRUE;
1370 0 : RuleChain *node=header;
1371 0 : while(node!=NULL) {
1372 0 : fKeywordNames.addElement(new UnicodeString(node->fKeyword), status);
1373 0 : if (U_FAILURE(status)) {
1374 0 : return;
1375 : }
1376 0 : if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1377 0 : addKeywordOther= FALSE;
1378 : }
1379 0 : node=node->fNext;
1380 : }
1381 :
1382 0 : if (addKeywordOther) {
1383 0 : fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1384 : }
1385 : }
1386 :
1387 : const UnicodeString*
1388 0 : PluralKeywordEnumeration::snext(UErrorCode& status) {
1389 0 : if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1390 0 : return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1391 : }
1392 0 : return NULL;
1393 : }
1394 :
1395 : void
1396 0 : PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1397 0 : pos=0;
1398 0 : }
1399 :
1400 : int32_t
1401 0 : PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1402 0 : return fKeywordNames.size();
1403 : }
1404 :
1405 0 : PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1406 0 : }
1407 :
1408 0 : FixedDecimal::FixedDecimal(const VisibleDigits &digits) {
1409 0 : digits.getFixedDecimal(
1410 : source, intValue, decimalDigits,
1411 : decimalDigitsWithoutTrailingZeros,
1412 0 : visibleDecimalDigitCount, hasIntegerValue);
1413 0 : isNegative = digits.isNegative();
1414 0 : isNanOrInfinity = digits.isNaNOrInfinity();
1415 0 : }
1416 :
1417 0 : FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
1418 0 : init(n, v, f);
1419 : // check values. TODO make into unit test.
1420 : //
1421 : // long visiblePower = (int) Math.pow(10, v);
1422 : // if (decimalDigits > visiblePower) {
1423 : // throw new IllegalArgumentException();
1424 : // }
1425 : // double fraction = intValue + (decimalDigits / (double) visiblePower);
1426 : // if (fraction != source) {
1427 : // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1428 : // if (diff > 0.00000001d) {
1429 : // throw new IllegalArgumentException();
1430 : // }
1431 : // }
1432 0 : }
1433 :
1434 0 : FixedDecimal::FixedDecimal(double n, int32_t v) {
1435 : // Ugly, but for samples we don't care.
1436 0 : init(n, v, getFractionalDigits(n, v));
1437 0 : }
1438 :
1439 0 : FixedDecimal::FixedDecimal(double n) {
1440 0 : init(n);
1441 0 : }
1442 :
1443 0 : FixedDecimal::FixedDecimal() {
1444 0 : init(0, 0, 0);
1445 0 : }
1446 :
1447 :
1448 : // Create a FixedDecimal from a UnicodeString containing a number.
1449 : // Inefficient, but only used for samples, so simplicity trumps efficiency.
1450 :
1451 0 : FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
1452 0 : CharString cs;
1453 0 : cs.appendInvariantChars(num, status);
1454 0 : DigitList dl;
1455 0 : dl.set(cs.toStringPiece(), status);
1456 0 : if (U_FAILURE(status)) {
1457 0 : init(0, 0, 0);
1458 0 : return;
1459 : }
1460 0 : int32_t decimalPoint = num.indexOf(DOT);
1461 0 : double n = dl.getDouble();
1462 0 : if (decimalPoint == -1) {
1463 0 : init(n, 0, 0);
1464 : } else {
1465 0 : int32_t v = num.length() - decimalPoint - 1;
1466 0 : init(n, v, getFractionalDigits(n, v));
1467 : }
1468 : }
1469 :
1470 :
1471 0 : FixedDecimal::FixedDecimal(const FixedDecimal &other) {
1472 0 : source = other.source;
1473 0 : visibleDecimalDigitCount = other.visibleDecimalDigitCount;
1474 0 : decimalDigits = other.decimalDigits;
1475 0 : decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
1476 0 : intValue = other.intValue;
1477 0 : hasIntegerValue = other.hasIntegerValue;
1478 0 : isNegative = other.isNegative;
1479 0 : isNanOrInfinity = other.isNanOrInfinity;
1480 0 : }
1481 :
1482 :
1483 0 : void FixedDecimal::init(double n) {
1484 0 : int32_t numFractionDigits = decimals(n);
1485 0 : init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1486 0 : }
1487 :
1488 :
1489 0 : void FixedDecimal::init(double n, int32_t v, int64_t f) {
1490 0 : isNegative = n < 0.0;
1491 0 : source = fabs(n);
1492 0 : isNanOrInfinity = uprv_isNaN(source) || uprv_isPositiveInfinity(source);
1493 0 : if (isNanOrInfinity) {
1494 0 : v = 0;
1495 0 : f = 0;
1496 0 : intValue = 0;
1497 0 : hasIntegerValue = FALSE;
1498 : } else {
1499 0 : intValue = (int64_t)source;
1500 0 : hasIntegerValue = (source == intValue);
1501 : }
1502 :
1503 0 : visibleDecimalDigitCount = v;
1504 0 : decimalDigits = f;
1505 0 : if (f == 0) {
1506 0 : decimalDigitsWithoutTrailingZeros = 0;
1507 : } else {
1508 0 : int64_t fdwtz = f;
1509 0 : while ((fdwtz%10) == 0) {
1510 0 : fdwtz /= 10;
1511 : }
1512 0 : decimalDigitsWithoutTrailingZeros = fdwtz;
1513 : }
1514 0 : }
1515 :
1516 :
1517 : // Fast path only exact initialization. Return true if successful.
1518 : // Note: Do not multiply by 10 each time through loop, rounding cruft can build
1519 : // up that makes the check for an integer result fail.
1520 : // A single multiply of the original number works more reliably.
1521 : static int32_t p10[] = {1, 10, 100, 1000, 10000};
1522 0 : UBool FixedDecimal::quickInit(double n) {
1523 0 : UBool success = FALSE;
1524 0 : n = fabs(n);
1525 : int32_t numFractionDigits;
1526 0 : for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
1527 0 : double scaledN = n * p10[numFractionDigits];
1528 0 : if (scaledN == floor(scaledN)) {
1529 0 : success = TRUE;
1530 0 : break;
1531 : }
1532 : }
1533 0 : if (success) {
1534 0 : init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1535 : }
1536 0 : return success;
1537 : }
1538 :
1539 :
1540 :
1541 0 : int32_t FixedDecimal::decimals(double n) {
1542 : // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1543 : // fastpath the common cases, integers or fractions with 3 or fewer digits
1544 0 : n = fabs(n);
1545 0 : for (int ndigits=0; ndigits<=3; ndigits++) {
1546 0 : double scaledN = n * p10[ndigits];
1547 0 : if (scaledN == floor(scaledN)) {
1548 0 : return ndigits;
1549 : }
1550 : }
1551 :
1552 : // Slow path, convert with sprintf, parse converted output.
1553 0 : char buf[30] = {0};
1554 0 : sprintf(buf, "%1.15e", n);
1555 : // formatted number looks like this: 1.234567890123457e-01
1556 0 : int exponent = atoi(buf+18);
1557 0 : int numFractionDigits = 15;
1558 0 : for (int i=16; ; --i) {
1559 0 : if (buf[i] != '0') {
1560 0 : break;
1561 : }
1562 0 : --numFractionDigits;
1563 : }
1564 0 : numFractionDigits -= exponent; // Fraction part of fixed point representation.
1565 0 : return numFractionDigits;
1566 : }
1567 :
1568 :
1569 : // Get the fraction digits of a double, represented as an integer.
1570 : // v is the number of visible fraction digits in the displayed form of the number.
1571 : // Example: n = 1001.234, v = 6, result = 234000
1572 : // TODO: need to think through how this is used in the plural rule context.
1573 : // This function can easily encounter integer overflow,
1574 : // and can easily return noise digits when the precision of a double is exceeded.
1575 :
1576 0 : int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
1577 0 : if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
1578 0 : return 0;
1579 : }
1580 0 : n = fabs(n);
1581 0 : double fract = n - floor(n);
1582 0 : switch (v) {
1583 0 : case 1: return (int64_t)(fract*10.0 + 0.5);
1584 0 : case 2: return (int64_t)(fract*100.0 + 0.5);
1585 0 : case 3: return (int64_t)(fract*1000.0 + 0.5);
1586 : default:
1587 0 : double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
1588 0 : if (scaled > U_INT64_MAX) {
1589 0 : return U_INT64_MAX;
1590 : } else {
1591 0 : return (int64_t)scaled;
1592 : }
1593 : }
1594 : }
1595 :
1596 :
1597 0 : void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
1598 0 : int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
1599 0 : if (numTrailingFractionZeros > 0) {
1600 0 : for (int32_t i=0; i<numTrailingFractionZeros; i++) {
1601 : // Do not let the decimalDigits value overflow if there are many trailing zeros.
1602 : // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1603 0 : if (decimalDigits >= 100000000000000000LL) {
1604 0 : break;
1605 : }
1606 0 : decimalDigits *= 10;
1607 : }
1608 0 : visibleDecimalDigitCount += numTrailingFractionZeros;
1609 : }
1610 0 : }
1611 :
1612 :
1613 0 : double FixedDecimal::get(tokenType operand) const {
1614 0 : switch(operand) {
1615 0 : case tVariableN: return source;
1616 0 : case tVariableI: return (double)intValue;
1617 0 : case tVariableF: return (double)decimalDigits;
1618 0 : case tVariableT: return (double)decimalDigitsWithoutTrailingZeros;
1619 0 : case tVariableV: return visibleDecimalDigitCount;
1620 : default:
1621 0 : U_ASSERT(FALSE); // unexpected.
1622 : return source;
1623 : }
1624 : }
1625 :
1626 0 : int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1627 0 : return visibleDecimalDigitCount;
1628 : }
1629 :
1630 :
1631 :
1632 0 : PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
1633 0 : fLocales = NULL;
1634 0 : fRes = NULL;
1635 0 : fOpenStatus = status;
1636 0 : if (U_FAILURE(status)) {
1637 0 : return;
1638 : }
1639 0 : fOpenStatus = U_ZERO_ERROR;
1640 0 : LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus));
1641 0 : fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus);
1642 : }
1643 :
1644 0 : PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1645 0 : ures_close(fLocales);
1646 0 : ures_close(fRes);
1647 0 : fLocales = NULL;
1648 0 : fRes = NULL;
1649 0 : }
1650 :
1651 0 : const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
1652 0 : if (U_FAILURE(status)) {
1653 0 : return NULL;
1654 : }
1655 0 : if (U_FAILURE(fOpenStatus)) {
1656 0 : status = fOpenStatus;
1657 0 : return NULL;
1658 : }
1659 0 : fRes = ures_getNextResource(fLocales, fRes, &status);
1660 0 : if (fRes == NULL || U_FAILURE(status)) {
1661 0 : if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
1662 0 : status = U_ZERO_ERROR;
1663 : }
1664 0 : return NULL;
1665 : }
1666 0 : const char *result = ures_getKey(fRes);
1667 0 : if (resultLength != NULL) {
1668 0 : *resultLength = uprv_strlen(result);
1669 : }
1670 0 : return result;
1671 : }
1672 :
1673 :
1674 0 : void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
1675 0 : if (U_FAILURE(status)) {
1676 0 : return;
1677 : }
1678 0 : if (U_FAILURE(fOpenStatus)) {
1679 0 : status = fOpenStatus;
1680 0 : return;
1681 : }
1682 0 : ures_resetIterator(fLocales);
1683 : }
1684 :
1685 0 : int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
1686 0 : if (U_FAILURE(status)) {
1687 0 : return 0;
1688 : }
1689 0 : if (U_FAILURE(fOpenStatus)) {
1690 0 : status = fOpenStatus;
1691 0 : return 0;
1692 : }
1693 0 : return ures_getSize(fLocales);
1694 : }
1695 :
1696 : U_NAMESPACE_END
1697 :
1698 :
1699 : #endif /* #if !UCONFIG_NO_FORMATTING */
1700 :
1701 : //eof
|