Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : ******************************************************************************
5 : * Copyright (C) 1997-2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : ******************************************************************************
8 : * file name: nfrs.cpp
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * Modification history
14 : * Date Name Comments
15 : * 10/11/2001 Doug Ported from ICU4J
16 : */
17 :
18 : #include "nfrs.h"
19 :
20 : #if U_HAVE_RBNF
21 :
22 : #include "unicode/uchar.h"
23 : #include "nfrule.h"
24 : #include "nfrlist.h"
25 : #include "patternprops.h"
26 : #include "putilimp.h"
27 :
28 : #ifdef RBNF_DEBUG
29 : #include "cmemory.h"
30 : #endif
31 :
32 : enum {
33 : /** -x */
34 : NEGATIVE_RULE_INDEX = 0,
35 : /** x.x */
36 : IMPROPER_FRACTION_RULE_INDEX = 1,
37 : /** 0.x */
38 : PROPER_FRACTION_RULE_INDEX = 2,
39 : /** x.0 */
40 : MASTER_RULE_INDEX = 3,
41 : /** Inf */
42 : INFINITY_RULE_INDEX = 4,
43 : /** NaN */
44 : NAN_RULE_INDEX = 5,
45 : NON_NUMERICAL_RULE_LENGTH = 6
46 : };
47 :
48 : U_NAMESPACE_BEGIN
49 :
50 : #if 0
51 : // euclid's algorithm works with doubles
52 : // note, doubles only get us up to one quadrillion or so, which
53 : // isn't as much range as we get with longs. We probably still
54 : // want either 64-bit math, or BigInteger.
55 :
56 : static int64_t
57 : util_lcm(int64_t x, int64_t y)
58 : {
59 : x.abs();
60 : y.abs();
61 :
62 : if (x == 0 || y == 0) {
63 : return 0;
64 : } else {
65 : do {
66 : if (x < y) {
67 : int64_t t = x; x = y; y = t;
68 : }
69 : x -= y * (x/y);
70 : } while (x != 0);
71 :
72 : return y;
73 : }
74 : }
75 :
76 : #else
77 : /**
78 : * Calculates the least common multiple of x and y.
79 : */
80 : static int64_t
81 0 : util_lcm(int64_t x, int64_t y)
82 : {
83 : // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84 : // vol. 2, 1st ed., pp. 298-299
85 0 : int64_t x1 = x;
86 0 : int64_t y1 = y;
87 :
88 0 : int p2 = 0;
89 0 : while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90 0 : ++p2;
91 0 : x1 >>= 1;
92 0 : y1 >>= 1;
93 : }
94 :
95 : int64_t t;
96 0 : if ((x1 & 1) == 1) {
97 0 : t = -y1;
98 : } else {
99 0 : t = x1;
100 : }
101 :
102 0 : while (t != 0) {
103 0 : while ((t & 1) == 0) {
104 0 : t = t >> 1;
105 : }
106 0 : if (t > 0) {
107 0 : x1 = t;
108 : } else {
109 0 : y1 = -t;
110 : }
111 0 : t = x1 - y1;
112 : }
113 :
114 0 : int64_t gcd = x1 << p2;
115 :
116 : // x * y == gcd(x, y) * lcm(x, y)
117 0 : return x / gcd * y;
118 : }
119 : #endif
120 :
121 : static const UChar gPercent = 0x0025;
122 : static const UChar gColon = 0x003a;
123 : static const UChar gSemicolon = 0x003b;
124 : static const UChar gLineFeed = 0x000a;
125 :
126 : static const UChar gPercentPercent[] =
127 : {
128 : 0x25, 0x25, 0
129 : }; /* "%%" */
130 :
131 : static const UChar gNoparse[] =
132 : {
133 : 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134 : }; /* "@noparse" */
135 :
136 0 : NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137 : : name()
138 : , rules(0)
139 : , owner(_owner)
140 : , fractionRules()
141 : , fIsFractionRuleSet(FALSE)
142 : , fIsPublic(FALSE)
143 0 : , fIsParseable(TRUE)
144 : {
145 0 : for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146 0 : nonNumericalRules[i] = NULL;
147 : }
148 :
149 0 : if (U_FAILURE(status)) {
150 0 : return;
151 : }
152 :
153 0 : UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154 :
155 0 : if (description.length() == 0) {
156 : // throw new IllegalArgumentException("Empty rule set description");
157 0 : status = U_PARSE_ERROR;
158 0 : return;
159 : }
160 :
161 : // if the description begins with a rule set name (the rule set
162 : // name can be omitted in formatter descriptions that consist
163 : // of only one rule set), copy it out into our "name" member
164 : // and delete it from the description
165 0 : if (description.charAt(0) == gPercent) {
166 0 : int32_t pos = description.indexOf(gColon);
167 0 : if (pos == -1) {
168 : // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169 0 : status = U_PARSE_ERROR;
170 : } else {
171 0 : name.setTo(description, 0, pos);
172 0 : while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173 : }
174 0 : description.remove(0, pos);
175 : }
176 : } else {
177 0 : name.setTo(UNICODE_STRING_SIMPLE("%default"));
178 : }
179 :
180 0 : if (description.length() == 0) {
181 : // throw new IllegalArgumentException("Empty rule set description");
182 0 : status = U_PARSE_ERROR;
183 : }
184 :
185 0 : fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186 :
187 0 : if ( name.endsWith(gNoparse,8) ) {
188 0 : fIsParseable = FALSE;
189 0 : name.truncate(name.length()-8); // remove the @noparse from the name
190 : }
191 :
192 : // all of the other members of NFRuleSet are initialized
193 : // by parseRules()
194 : }
195 :
196 : void
197 0 : NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198 : {
199 : // start by creating a Vector whose elements are Strings containing
200 : // the descriptions of the rules (one rule per element). The rules
201 : // are separated by semicolons (there's no escape facility: ALL
202 : // semicolons are rule delimiters)
203 :
204 0 : if (U_FAILURE(status)) {
205 0 : return;
206 : }
207 :
208 : // ensure we are starting with an empty rule list
209 0 : rules.deleteAll();
210 :
211 : // dlf - the original code kept a separate description array for no reason,
212 : // so I got rid of it. The loop was too complex so I simplified it.
213 :
214 0 : UnicodeString currentDescription;
215 0 : int32_t oldP = 0;
216 0 : while (oldP < description.length()) {
217 0 : int32_t p = description.indexOf(gSemicolon, oldP);
218 0 : if (p == -1) {
219 0 : p = description.length();
220 : }
221 0 : currentDescription.setTo(description, oldP, p - oldP);
222 0 : NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223 0 : oldP = p + 1;
224 : }
225 :
226 : // for rules that didn't specify a base value, their base values
227 : // were initialized to 0. Make another pass through the list and
228 : // set all those rules' base values. We also remove any special
229 : // rules from the list and put them into their own member variables
230 0 : int64_t defaultBaseValue = 0;
231 :
232 : // (this isn't a for loop because we might be deleting items from
233 : // the vector-- we want to make sure we only increment i when
234 : // we _didn't_ delete aything from the vector)
235 0 : int32_t rulesSize = rules.size();
236 0 : for (int32_t i = 0; i < rulesSize; i++) {
237 0 : NFRule* rule = rules[i];
238 0 : int64_t baseValue = rule->getBaseValue();
239 :
240 0 : if (baseValue == 0) {
241 : // if the rule's base value is 0, fill in a default
242 : // base value (this will be 1 plus the preceding
243 : // rule's base value for regular rule sets, and the
244 : // same as the preceding rule's base value in fraction
245 : // rule sets)
246 0 : rule->setBaseValue(defaultBaseValue, status);
247 : }
248 : else {
249 : // if it's a regular rule that already knows its base value,
250 : // check to make sure the rules are in order, and update
251 : // the default base value for the next rule
252 0 : if (baseValue < defaultBaseValue) {
253 : // throw new IllegalArgumentException("Rules are not in order");
254 0 : status = U_PARSE_ERROR;
255 0 : return;
256 : }
257 0 : defaultBaseValue = baseValue;
258 : }
259 0 : if (!fIsFractionRuleSet) {
260 0 : ++defaultBaseValue;
261 : }
262 : }
263 : }
264 :
265 : /**
266 : * Set one of the non-numerical rules.
267 : * @param rule The rule to set.
268 : */
269 0 : void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270 0 : int64_t baseValue = rule->getBaseValue();
271 0 : if (baseValue == NFRule::kNegativeNumberRule) {
272 0 : delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273 0 : nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274 : }
275 0 : else if (baseValue == NFRule::kImproperFractionRule) {
276 0 : setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
277 : }
278 0 : else if (baseValue == NFRule::kProperFractionRule) {
279 0 : setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
280 : }
281 0 : else if (baseValue == NFRule::kMasterRule) {
282 0 : setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
283 : }
284 0 : else if (baseValue == NFRule::kInfinityRule) {
285 0 : delete nonNumericalRules[INFINITY_RULE_INDEX];
286 0 : nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287 : }
288 0 : else if (baseValue == NFRule::kNaNRule) {
289 0 : delete nonNumericalRules[NAN_RULE_INDEX];
290 0 : nonNumericalRules[NAN_RULE_INDEX] = rule;
291 : }
292 0 : }
293 :
294 : /**
295 : * Determine the best fraction rule to use. Rules matching the decimal point from
296 : * DecimalFormatSymbols become the main set of rules to use.
297 : * @param originalIndex The index into nonNumericalRules
298 : * @param newRule The new rule to consider
299 : * @param rememberRule Should the new rule be added to fractionRules.
300 : */
301 0 : void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
302 0 : if (rememberRule) {
303 0 : fractionRules.add(newRule);
304 : }
305 0 : NFRule *bestResult = nonNumericalRules[originalIndex];
306 0 : if (bestResult == NULL) {
307 0 : nonNumericalRules[originalIndex] = newRule;
308 : }
309 : else {
310 : // We have more than one. Which one is better?
311 0 : const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
312 0 : if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
313 0 : == newRule->getDecimalPoint())
314 : {
315 0 : nonNumericalRules[originalIndex] = newRule;
316 : }
317 : // else leave it alone
318 : }
319 0 : }
320 :
321 0 : NFRuleSet::~NFRuleSet()
322 : {
323 0 : for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
324 0 : if (i != IMPROPER_FRACTION_RULE_INDEX
325 0 : && i != PROPER_FRACTION_RULE_INDEX
326 0 : && i != MASTER_RULE_INDEX)
327 : {
328 0 : delete nonNumericalRules[i];
329 : }
330 : // else it will be deleted via NFRuleList fractionRules
331 : }
332 0 : }
333 :
334 : static UBool
335 0 : util_equalRules(const NFRule* rule1, const NFRule* rule2)
336 : {
337 0 : if (rule1) {
338 0 : if (rule2) {
339 0 : return *rule1 == *rule2;
340 : }
341 0 : } else if (!rule2) {
342 0 : return TRUE;
343 : }
344 0 : return FALSE;
345 : }
346 :
347 : UBool
348 0 : NFRuleSet::operator==(const NFRuleSet& rhs) const
349 : {
350 0 : if (rules.size() == rhs.rules.size() &&
351 0 : fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
352 0 : name == rhs.name) {
353 :
354 : // ...then compare the non-numerical rule lists...
355 0 : for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
356 0 : if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
357 0 : return FALSE;
358 : }
359 : }
360 :
361 : // ...then compare the rule lists...
362 0 : for (uint32_t i = 0; i < rules.size(); ++i) {
363 0 : if (*rules[i] != *rhs.rules[i]) {
364 0 : return FALSE;
365 : }
366 : }
367 0 : return TRUE;
368 : }
369 0 : return FALSE;
370 : }
371 :
372 : void
373 0 : NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
374 0 : for (uint32_t i = 0; i < rules.size(); ++i) {
375 0 : rules[i]->setDecimalFormatSymbols(newSymbols, status);
376 : }
377 : // Switch the fraction rules to mirror the DecimalFormatSymbols.
378 0 : for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
379 0 : if (nonNumericalRules[nonNumericalIdx]) {
380 0 : for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
381 0 : NFRule *fractionRule = fractionRules[fIdx];
382 0 : if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
383 0 : setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
384 : }
385 : }
386 : }
387 : }
388 :
389 0 : for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
390 0 : NFRule *rule = nonNumericalRules[nnrIdx];
391 0 : if (rule) {
392 0 : rule->setDecimalFormatSymbols(newSymbols, status);
393 : }
394 : }
395 0 : }
396 :
397 : #define RECURSION_LIMIT 64
398 :
399 : void
400 0 : NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
401 : {
402 0 : if (recursionCount >= RECURSION_LIMIT) {
403 : // stop recursion
404 0 : status = U_INVALID_STATE_ERROR;
405 0 : return;
406 : }
407 0 : const NFRule *rule = findNormalRule(number);
408 0 : if (rule) { // else error, but can't report it
409 0 : rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
410 : }
411 : }
412 :
413 : void
414 0 : NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
415 : {
416 0 : if (recursionCount >= RECURSION_LIMIT) {
417 : // stop recursion
418 0 : status = U_INVALID_STATE_ERROR;
419 0 : return;
420 : }
421 0 : const NFRule *rule = findDoubleRule(number);
422 0 : if (rule) { // else error, but can't report it
423 0 : rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
424 : }
425 : }
426 :
427 : const NFRule*
428 0 : NFRuleSet::findDoubleRule(double number) const
429 : {
430 : // if this is a fraction rule set, use findFractionRuleSetRule()
431 0 : if (isFractionRuleSet()) {
432 0 : return findFractionRuleSetRule(number);
433 : }
434 :
435 0 : if (uprv_isNaN(number)) {
436 0 : const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
437 0 : if (!rule) {
438 0 : rule = owner->getDefaultNaNRule();
439 : }
440 0 : return rule;
441 : }
442 :
443 : // if the number is negative, return the negative number rule
444 : // (if there isn't a negative-number rule, we pretend it's a
445 : // positive number)
446 0 : if (number < 0) {
447 0 : if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
448 0 : return nonNumericalRules[NEGATIVE_RULE_INDEX];
449 : } else {
450 0 : number = -number;
451 : }
452 : }
453 :
454 0 : if (uprv_isInfinite(number)) {
455 0 : const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
456 0 : if (!rule) {
457 0 : rule = owner->getDefaultInfinityRule();
458 : }
459 0 : return rule;
460 : }
461 :
462 : // if the number isn't an integer, we use one of the fraction rules...
463 0 : if (number != uprv_floor(number)) {
464 : // if the number is between 0 and 1, return the proper
465 : // fraction rule
466 0 : if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
467 0 : return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
468 : }
469 : // otherwise, return the improper fraction rule
470 0 : else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
471 0 : return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
472 : }
473 : }
474 :
475 : // if there's a master rule, use it to format the number
476 0 : if (nonNumericalRules[MASTER_RULE_INDEX]) {
477 0 : return nonNumericalRules[MASTER_RULE_INDEX];
478 : }
479 :
480 : // and if we haven't yet returned a rule, use findNormalRule()
481 : // to find the applicable rule
482 0 : int64_t r = util64_fromDouble(number + 0.5);
483 0 : return findNormalRule(r);
484 : }
485 :
486 : const NFRule *
487 0 : NFRuleSet::findNormalRule(int64_t number) const
488 : {
489 : // if this is a fraction rule set, use findFractionRuleSetRule()
490 : // to find the rule (we should only go into this clause if the
491 : // value is 0)
492 0 : if (fIsFractionRuleSet) {
493 0 : return findFractionRuleSetRule((double)number);
494 : }
495 :
496 : // if the number is negative, return the negative-number rule
497 : // (if there isn't one, pretend the number is positive)
498 0 : if (number < 0) {
499 0 : if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
500 0 : return nonNumericalRules[NEGATIVE_RULE_INDEX];
501 : } else {
502 0 : number = -number;
503 : }
504 : }
505 :
506 : // we have to repeat the preceding two checks, even though we
507 : // do them in findRule(), because the version of format() that
508 : // takes a long bypasses findRule() and goes straight to this
509 : // function. This function does skip the fraction rules since
510 : // we know the value is an integer (it also skips the master
511 : // rule, since it's considered a fraction rule. Skipping the
512 : // master rule in this function is also how we avoid infinite
513 : // recursion)
514 :
515 : // {dlf} unfortunately this fails if there are no rules except
516 : // special rules. If there are no rules, use the master rule.
517 :
518 : // binary-search the rule list for the applicable rule
519 : // (a rule is used for all values from its base value to
520 : // the next rule's base value)
521 0 : int32_t hi = rules.size();
522 0 : if (hi > 0) {
523 0 : int32_t lo = 0;
524 :
525 0 : while (lo < hi) {
526 0 : int32_t mid = (lo + hi) / 2;
527 0 : if (rules[mid]->getBaseValue() == number) {
528 0 : return rules[mid];
529 : }
530 0 : else if (rules[mid]->getBaseValue() > number) {
531 0 : hi = mid;
532 : }
533 : else {
534 0 : lo = mid + 1;
535 : }
536 : }
537 0 : if (hi == 0) { // bad rule set, minimum base > 0
538 0 : return NULL; // want to throw exception here
539 : }
540 :
541 0 : NFRule *result = rules[hi - 1];
542 :
543 : // use shouldRollBack() to see whether we need to invoke the
544 : // rollback rule (see shouldRollBack()'s documentation for
545 : // an explanation of the rollback rule). If we do, roll back
546 : // one rule and return that one instead of the one we'd normally
547 : // return
548 0 : if (result->shouldRollBack(number)) {
549 0 : if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
550 0 : return NULL;
551 : }
552 0 : result = rules[hi - 2];
553 : }
554 0 : return result;
555 : }
556 : // else use the master rule
557 0 : return nonNumericalRules[MASTER_RULE_INDEX];
558 : }
559 :
560 : /**
561 : * If this rule is a fraction rule set, this function is used by
562 : * findRule() to select the most appropriate rule for formatting
563 : * the number. Basically, the base value of each rule in the rule
564 : * set is treated as the denominator of a fraction. Whichever
565 : * denominator can produce the fraction closest in value to the
566 : * number passed in is the result. If there's a tie, the earlier
567 : * one in the list wins. (If there are two rules in a row with the
568 : * same base value, the first one is used when the numerator of the
569 : * fraction would be 1, and the second rule is used the rest of the
570 : * time.
571 : * @param number The number being formatted (which will always be
572 : * a number between 0 and 1)
573 : * @return The rule to use to format this number
574 : */
575 : const NFRule*
576 0 : NFRuleSet::findFractionRuleSetRule(double number) const
577 : {
578 : // the obvious way to do this (multiply the value being formatted
579 : // by each rule's base value until you get an integral result)
580 : // doesn't work because of rounding error. This method is more
581 : // accurate
582 :
583 : // find the least common multiple of the rules' base values
584 : // and multiply this by the number being formatted. This is
585 : // all the precision we need, and we can do all of the rest
586 : // of the math using integer arithmetic
587 0 : int64_t leastCommonMultiple = rules[0]->getBaseValue();
588 : int64_t numerator;
589 : {
590 0 : for (uint32_t i = 1; i < rules.size(); ++i) {
591 0 : leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
592 : }
593 0 : numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
594 : }
595 : // for each rule, do the following...
596 : int64_t tempDifference;
597 0 : int64_t difference = util64_fromDouble(uprv_maxMantissa());
598 0 : int32_t winner = 0;
599 0 : for (uint32_t i = 0; i < rules.size(); ++i) {
600 : // "numerator" is the numerator of the fraction if the
601 : // denominator is the LCD. The numerator if the rule's
602 : // base value is the denominator is "numerator" times the
603 : // base value divided bythe LCD. Here we check to see if
604 : // that's an integer, and if not, how close it is to being
605 : // an integer.
606 0 : tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
607 :
608 :
609 : // normalize the result of the above calculation: we want
610 : // the numerator's distance from the CLOSEST multiple
611 : // of the LCD
612 0 : if (leastCommonMultiple - tempDifference < tempDifference) {
613 0 : tempDifference = leastCommonMultiple - tempDifference;
614 : }
615 :
616 : // if this is as close as we've come, keep track of how close
617 : // that is, and the line number of the rule that did it. If
618 : // we've scored a direct hit, we don't have to look at any more
619 : // rules
620 0 : if (tempDifference < difference) {
621 0 : difference = tempDifference;
622 0 : winner = i;
623 0 : if (difference == 0) {
624 0 : break;
625 : }
626 : }
627 : }
628 :
629 : // if we have two successive rules that both have the winning base
630 : // value, then the first one (the one we found above) is used if
631 : // the numerator of the fraction is 1 and the second one is used if
632 : // the numerator of the fraction is anything else (this lets us
633 : // do things like "one third"/"two thirds" without haveing to define
634 : // a whole bunch of extra rule sets)
635 0 : if ((unsigned)(winner + 1) < rules.size() &&
636 0 : rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
637 0 : double n = ((double)rules[winner]->getBaseValue()) * number;
638 0 : if (n < 0.5 || n >= 2) {
639 0 : ++winner;
640 : }
641 : }
642 :
643 : // finally, return the winning rule
644 0 : return rules[winner];
645 : }
646 :
647 : /**
648 : * Parses a string. Matches the string to be parsed against each
649 : * of its rules (with a base value less than upperBound) and returns
650 : * the value produced by the rule that matched the most charcters
651 : * in the source string.
652 : * @param text The string to parse
653 : * @param parsePosition The initial position is ignored and assumed
654 : * to be 0. On exit, this object has been updated to point to the
655 : * first character position this rule set didn't consume.
656 : * @param upperBound Limits the rules that can be allowed to match.
657 : * Only rules whose base values are strictly less than upperBound
658 : * are considered.
659 : * @return The numerical result of parsing this string. This will
660 : * be the matching rule's base value, composed appropriately with
661 : * the results of matching any of its substitutions. The object
662 : * will be an instance of Long if it's an integral value; otherwise,
663 : * it will be an instance of Double. This function always returns
664 : * a valid object: If nothing matched the input string at all,
665 : * this function returns new Long(0), and the parse position is
666 : * left unchanged.
667 : */
668 : #ifdef RBNF_DEBUG
669 : #include <stdio.h>
670 :
671 : static void dumpUS(FILE* f, const UnicodeString& us) {
672 : int len = us.length();
673 : char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
674 : if (buf != NULL) {
675 : us.extract(0, len, buf);
676 : buf[len] = 0;
677 : fprintf(f, "%s", buf);
678 : uprv_free(buf); //delete[] buf;
679 : }
680 : }
681 : #endif
682 :
683 : UBool
684 0 : NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
685 : {
686 : // try matching each rule in the rule set against the text being
687 : // parsed. Whichever one matches the most characters is the one
688 : // that determines the value we return.
689 :
690 0 : result.setLong(0);
691 :
692 : // dump out if there's no text to parse
693 0 : if (text.length() == 0) {
694 0 : return 0;
695 : }
696 :
697 0 : ParsePosition highWaterMark;
698 0 : ParsePosition workingPos = pos;
699 :
700 : #ifdef RBNF_DEBUG
701 : fprintf(stderr, "<nfrs> %x '", this);
702 : dumpUS(stderr, name);
703 : fprintf(stderr, "' text '");
704 : dumpUS(stderr, text);
705 : fprintf(stderr, "'\n");
706 : fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
707 : #endif
708 : // Try each of the negative rules, fraction rules, infinity rules and NaN rules
709 0 : for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
710 0 : if (nonNumericalRules[i]) {
711 0 : Formattable tempResult;
712 0 : UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
713 0 : if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
714 0 : result = tempResult;
715 0 : highWaterMark = workingPos;
716 : }
717 0 : workingPos = pos;
718 : }
719 : }
720 : #ifdef RBNF_DEBUG
721 : fprintf(stderr, "<nfrs> continue other with text '");
722 : dumpUS(stderr, text);
723 : fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
724 : #endif
725 :
726 : // finally, go through the regular rules one at a time. We start
727 : // at the end of the list because we want to try matching the most
728 : // sigificant rule first (this helps ensure that we parse
729 : // "five thousand three hundred six" as
730 : // "(five thousand) (three hundred) (six)" rather than
731 : // "((five thousand three) hundred) (six)"). Skip rules whose
732 : // base values are higher than the upper bound (again, this helps
733 : // limit ambiguity by making sure the rules that match a rule's
734 : // are less significant than the rule containing the substitutions)/
735 : {
736 0 : int64_t ub = util64_fromDouble(upperBound);
737 : #ifdef RBNF_DEBUG
738 : {
739 : char ubstr[64];
740 : util64_toa(ub, ubstr, 64);
741 : char ubstrhex[64];
742 : util64_toa(ub, ubstrhex, 64, 16);
743 : fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
744 : }
745 : #endif
746 0 : for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
747 0 : if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
748 0 : continue;
749 : }
750 0 : Formattable tempResult;
751 0 : UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
752 0 : if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
753 0 : result = tempResult;
754 0 : highWaterMark = workingPos;
755 : }
756 0 : workingPos = pos;
757 : }
758 : }
759 : #ifdef RBNF_DEBUG
760 : fprintf(stderr, "<nfrs> exit\n");
761 : #endif
762 : // finally, update the parse postion we were passed to point to the
763 : // first character we didn't use, and return the result that
764 : // corresponds to that string of characters
765 0 : pos = highWaterMark;
766 :
767 0 : return 1;
768 : }
769 :
770 : void
771 0 : NFRuleSet::appendRules(UnicodeString& result) const
772 : {
773 : uint32_t i;
774 :
775 : // the rule set name goes first...
776 0 : result.append(name);
777 0 : result.append(gColon);
778 0 : result.append(gLineFeed);
779 :
780 : // followed by the regular rules...
781 0 : for (i = 0; i < rules.size(); i++) {
782 0 : rules[i]->_appendRuleText(result);
783 0 : result.append(gLineFeed);
784 : }
785 :
786 : // followed by the special rules (if they exist)
787 0 : for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
788 0 : NFRule *rule = nonNumericalRules[i];
789 0 : if (nonNumericalRules[i]) {
790 0 : if (rule->getBaseValue() == NFRule::kImproperFractionRule
791 0 : || rule->getBaseValue() == NFRule::kProperFractionRule
792 0 : || rule->getBaseValue() == NFRule::kMasterRule)
793 : {
794 0 : for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
795 0 : NFRule *fractionRule = fractionRules[fIdx];
796 0 : if (fractionRule->getBaseValue() == rule->getBaseValue()) {
797 0 : fractionRule->_appendRuleText(result);
798 0 : result.append(gLineFeed);
799 : }
800 : }
801 : }
802 : else {
803 0 : rule->_appendRuleText(result);
804 0 : result.append(gLineFeed);
805 : }
806 : }
807 : }
808 0 : }
809 :
810 : // utility functions
811 :
812 0 : int64_t util64_fromDouble(double d) {
813 0 : int64_t result = 0;
814 0 : if (!uprv_isNaN(d)) {
815 0 : double mant = uprv_maxMantissa();
816 0 : if (d < -mant) {
817 0 : d = -mant;
818 0 : } else if (d > mant) {
819 0 : d = mant;
820 : }
821 0 : UBool neg = d < 0;
822 0 : if (neg) {
823 0 : d = -d;
824 : }
825 0 : result = (int64_t)uprv_floor(d);
826 0 : if (neg) {
827 0 : result = -result;
828 : }
829 : }
830 0 : return result;
831 : }
832 :
833 0 : int64_t util64_pow(int32_t base, uint16_t exponent) {
834 0 : if (base == 0) {
835 0 : return 0;
836 : }
837 0 : int64_t result = 1;
838 0 : int64_t pow = base;
839 0 : while (exponent > 0) {
840 0 : if ((exponent & 1) == 1) {
841 0 : result *= pow;
842 : }
843 0 : pow *= pow;
844 0 : exponent >>= 1;
845 : }
846 0 : return result;
847 : }
848 :
849 : static const uint8_t asciiDigits[] = {
850 : 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
851 : 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
852 : 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
853 : 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
854 : 0x77u, 0x78u, 0x79u, 0x7au,
855 : };
856 :
857 : static const UChar kUMinus = (UChar)0x002d;
858 :
859 : #ifdef RBNF_DEBUG
860 : static const char kMinus = '-';
861 :
862 : static const uint8_t digitInfo[] = {
863 : 0, 0, 0, 0, 0, 0, 0, 0,
864 : 0, 0, 0, 0, 0, 0, 0, 0,
865 : 0, 0, 0, 0, 0, 0, 0, 0,
866 : 0, 0, 0, 0, 0, 0, 0, 0,
867 : 0, 0, 0, 0, 0, 0, 0, 0,
868 : 0, 0, 0, 0, 0, 0, 0, 0,
869 : 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
870 : 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
871 : 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
872 : 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
873 : 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
874 : 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
875 : 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
876 : 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
877 : 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
878 : 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
879 : };
880 :
881 : int64_t util64_atoi(const char* str, uint32_t radix)
882 : {
883 : if (radix > 36) {
884 : radix = 36;
885 : } else if (radix < 2) {
886 : radix = 2;
887 : }
888 : int64_t lradix = radix;
889 :
890 : int neg = 0;
891 : if (*str == kMinus) {
892 : ++str;
893 : neg = 1;
894 : }
895 : int64_t result = 0;
896 : uint8_t b;
897 : while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
898 : result *= lradix;
899 : result += (int32_t)b;
900 : }
901 : if (neg) {
902 : result = -result;
903 : }
904 : return result;
905 : }
906 :
907 : int64_t util64_utoi(const UChar* str, uint32_t radix)
908 : {
909 : if (radix > 36) {
910 : radix = 36;
911 : } else if (radix < 2) {
912 : radix = 2;
913 : }
914 : int64_t lradix = radix;
915 :
916 : int neg = 0;
917 : if (*str == kUMinus) {
918 : ++str;
919 : neg = 1;
920 : }
921 : int64_t result = 0;
922 : UChar c;
923 : uint8_t b;
924 : while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
925 : result *= lradix;
926 : result += (int32_t)b;
927 : }
928 : if (neg) {
929 : result = -result;
930 : }
931 : return result;
932 : }
933 :
934 : uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
935 : {
936 : if (radix > 36) {
937 : radix = 36;
938 : } else if (radix < 2) {
939 : radix = 2;
940 : }
941 : int64_t base = radix;
942 :
943 : char* p = buf;
944 : if (len && (w < 0) && (radix == 10) && !raw) {
945 : w = -w;
946 : *p++ = kMinus;
947 : --len;
948 : } else if (len && (w == 0)) {
949 : *p++ = (char)raw ? 0 : asciiDigits[0];
950 : --len;
951 : }
952 :
953 : while (len && w != 0) {
954 : int64_t n = w / base;
955 : int64_t m = n * base;
956 : int32_t d = (int32_t)(w-m);
957 : *p++ = raw ? (char)d : asciiDigits[d];
958 : w = n;
959 : --len;
960 : }
961 : if (len) {
962 : *p = 0; // null terminate if room for caller convenience
963 : }
964 :
965 : len = p - buf;
966 : if (*buf == kMinus) {
967 : ++buf;
968 : }
969 : while (--p > buf) {
970 : char c = *p;
971 : *p = *buf;
972 : *buf = c;
973 : ++buf;
974 : }
975 :
976 : return len;
977 : }
978 : #endif
979 :
980 0 : uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
981 : {
982 0 : if (radix > 36) {
983 0 : radix = 36;
984 0 : } else if (radix < 2) {
985 0 : radix = 2;
986 : }
987 0 : int64_t base = radix;
988 :
989 0 : UChar* p = buf;
990 0 : if (len && (w < 0) && (radix == 10) && !raw) {
991 0 : w = -w;
992 0 : *p++ = kUMinus;
993 0 : --len;
994 0 : } else if (len && (w == 0)) {
995 0 : *p++ = (UChar)raw ? 0 : asciiDigits[0];
996 0 : --len;
997 : }
998 :
999 0 : while (len && (w != 0)) {
1000 0 : int64_t n = w / base;
1001 0 : int64_t m = n * base;
1002 0 : int32_t d = (int32_t)(w-m);
1003 0 : *p++ = (UChar)(raw ? d : asciiDigits[d]);
1004 0 : w = n;
1005 0 : --len;
1006 : }
1007 0 : if (len) {
1008 0 : *p = 0; // null terminate if room for caller convenience
1009 : }
1010 :
1011 0 : len = (uint32_t)(p - buf);
1012 0 : if (*buf == kUMinus) {
1013 0 : ++buf;
1014 : }
1015 0 : while (--p > buf) {
1016 0 : UChar c = *p;
1017 0 : *p = *buf;
1018 0 : *buf = c;
1019 0 : ++buf;
1020 : }
1021 :
1022 0 : return len;
1023 : }
1024 :
1025 :
1026 : U_NAMESPACE_END
1027 :
1028 : /* U_HAVE_RBNF */
1029 : #endif
1030 :
|