Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 1997-2013, International Business Machines Corporation and *
6 : * others. All Rights Reserved. *
7 : *******************************************************************************
8 : *
9 : * File CHOICFMT.CPP
10 : *
11 : * Modification History:
12 : *
13 : * Date Name Description
14 : * 02/19/97 aliu Converted from java.
15 : * 03/20/97 helena Finished first cut of implementation and got rid
16 : * of nextDouble/previousDouble and replaced with
17 : * boolean array.
18 : * 4/10/97 aliu Clean up. Modified to work on AIX.
19 : * 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
20 : * wchar.h.
21 : * 07/09/97 helena Made ParsePosition into a class.
22 : * 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
23 : * 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
24 : * 02/22/99 stephen Removed character literals for EBCDIC safety
25 : ********************************************************************************
26 : */
27 :
28 : #include "unicode/utypes.h"
29 :
30 : #if !UCONFIG_NO_FORMATTING
31 :
32 : #include "unicode/choicfmt.h"
33 : #include "unicode/numfmt.h"
34 : #include "unicode/locid.h"
35 : #include "cpputils.h"
36 : #include "cstring.h"
37 : #include "messageimpl.h"
38 : #include "putilimp.h"
39 : #include "uassert.h"
40 : #include <stdio.h>
41 : #include <float.h>
42 :
43 : // *****************************************************************************
44 : // class ChoiceFormat
45 : // *****************************************************************************
46 :
47 : U_NAMESPACE_BEGIN
48 :
49 0 : UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
50 :
51 : // Special characters used by ChoiceFormat. There are two characters
52 : // used interchangeably to indicate <=. Either is parsed, but only
53 : // LESS_EQUAL is generated by toPattern().
54 : #define SINGLE_QUOTE ((UChar)0x0027) /*'*/
55 : #define LESS_THAN ((UChar)0x003C) /*<*/
56 : #define LESS_EQUAL ((UChar)0x0023) /*#*/
57 : #define LESS_EQUAL2 ((UChar)0x2264)
58 : #define VERTICAL_BAR ((UChar)0x007C) /*|*/
59 : #define MINUS ((UChar)0x002D) /*-*/
60 :
61 : static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
62 : static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
63 :
64 : #ifdef INFINITY
65 : #undef INFINITY
66 : #endif
67 : #define INFINITY ((UChar)0x221E)
68 :
69 : //static const UChar gPositiveInfinity[] = {INFINITY, 0};
70 : //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
71 : #define POSITIVE_INF_STRLEN 1
72 : #define NEGATIVE_INF_STRLEN 2
73 :
74 : // -------------------------------------
75 : // Creates a ChoiceFormat instance based on the pattern.
76 :
77 0 : ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78 0 : UErrorCode& status)
79 : : constructorErrorCode(status),
80 0 : msgPattern(status)
81 : {
82 0 : applyPattern(newPattern, status);
83 0 : }
84 :
85 : // -------------------------------------
86 : // Creates a ChoiceFormat instance with the limit array and
87 : // format strings for each limit.
88 :
89 0 : ChoiceFormat::ChoiceFormat(const double* limits,
90 : const UnicodeString* formats,
91 0 : int32_t cnt )
92 : : constructorErrorCode(U_ZERO_ERROR),
93 0 : msgPattern(constructorErrorCode)
94 : {
95 0 : setChoices(limits, NULL, formats, cnt, constructorErrorCode);
96 0 : }
97 :
98 : // -------------------------------------
99 :
100 0 : ChoiceFormat::ChoiceFormat(const double* limits,
101 : const UBool* closures,
102 : const UnicodeString* formats,
103 0 : int32_t cnt )
104 : : constructorErrorCode(U_ZERO_ERROR),
105 0 : msgPattern(constructorErrorCode)
106 : {
107 0 : setChoices(limits, closures, formats, cnt, constructorErrorCode);
108 0 : }
109 :
110 : // -------------------------------------
111 : // copy constructor
112 :
113 0 : ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
114 : : NumberFormat(that),
115 0 : constructorErrorCode(that.constructorErrorCode),
116 0 : msgPattern(that.msgPattern)
117 : {
118 0 : }
119 :
120 : // -------------------------------------
121 : // Private constructor that creates a
122 : // ChoiceFormat instance based on the
123 : // pattern and populates UParseError
124 :
125 0 : ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126 : UParseError& parseError,
127 0 : UErrorCode& status)
128 : : constructorErrorCode(status),
129 0 : msgPattern(status)
130 : {
131 0 : applyPattern(newPattern,parseError, status);
132 0 : }
133 : // -------------------------------------
134 :
135 : UBool
136 0 : ChoiceFormat::operator==(const Format& that) const
137 : {
138 0 : if (this == &that) return TRUE;
139 0 : if (!NumberFormat::operator==(that)) return FALSE;
140 0 : ChoiceFormat& thatAlias = (ChoiceFormat&)that;
141 0 : return msgPattern == thatAlias.msgPattern;
142 : }
143 :
144 : // -------------------------------------
145 : // copy constructor
146 :
147 : const ChoiceFormat&
148 0 : ChoiceFormat::operator=(const ChoiceFormat& that)
149 : {
150 0 : if (this != &that) {
151 0 : NumberFormat::operator=(that);
152 0 : constructorErrorCode = that.constructorErrorCode;
153 0 : msgPattern = that.msgPattern;
154 : }
155 0 : return *this;
156 : }
157 :
158 : // -------------------------------------
159 :
160 0 : ChoiceFormat::~ChoiceFormat()
161 : {
162 0 : }
163 :
164 : // -------------------------------------
165 :
166 : /**
167 : * Convert a double value to a string without the overhead of NumberFormat.
168 : */
169 : UnicodeString&
170 0 : ChoiceFormat::dtos(double value,
171 : UnicodeString& string)
172 : {
173 : /* Buffer to contain the digits and any extra formatting stuff. */
174 : char temp[DBL_DIG + 16];
175 0 : char *itrPtr = temp;
176 : char *expPtr;
177 :
178 0 : sprintf(temp, "%.*g", DBL_DIG, value);
179 :
180 : /* Find and convert the decimal point.
181 : Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182 : */
183 0 : while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184 0 : itrPtr++;
185 : }
186 0 : if (*itrPtr != 0 && *itrPtr != 'e') {
187 : /* We reached something that looks like a decimal point.
188 : In case someone used setlocale(), which changes the decimal point. */
189 0 : *itrPtr = '.';
190 0 : itrPtr++;
191 : }
192 : /* Search for the exponent */
193 0 : while (*itrPtr && *itrPtr != 'e') {
194 0 : itrPtr++;
195 : }
196 0 : if (*itrPtr == 'e') {
197 0 : itrPtr++;
198 : /* Verify the exponent sign */
199 0 : if (*itrPtr == '+' || *itrPtr == '-') {
200 0 : itrPtr++;
201 : }
202 : /* Remove leading zeros. You will see this on Windows machines. */
203 0 : expPtr = itrPtr;
204 0 : while (*itrPtr == '0') {
205 0 : itrPtr++;
206 : }
207 0 : if (*itrPtr && expPtr != itrPtr) {
208 : /* Shift the exponent without zeros. */
209 0 : while (*itrPtr) {
210 0 : *(expPtr++) = *(itrPtr++);
211 : }
212 : // NULL terminate
213 0 : *expPtr = 0;
214 : }
215 : }
216 :
217 0 : string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
218 0 : return string;
219 : }
220 :
221 : // -------------------------------------
222 : // calls the overloaded applyPattern method.
223 :
224 : void
225 0 : ChoiceFormat::applyPattern(const UnicodeString& pattern,
226 : UErrorCode& status)
227 : {
228 0 : msgPattern.parseChoiceStyle(pattern, NULL, status);
229 0 : constructorErrorCode = status;
230 0 : }
231 :
232 : // -------------------------------------
233 : // Applies the pattern to this ChoiceFormat instance.
234 :
235 : void
236 0 : ChoiceFormat::applyPattern(const UnicodeString& pattern,
237 : UParseError& parseError,
238 : UErrorCode& status)
239 : {
240 0 : msgPattern.parseChoiceStyle(pattern, &parseError, status);
241 0 : constructorErrorCode = status;
242 0 : }
243 : // -------------------------------------
244 : // Returns the input pattern string.
245 :
246 : UnicodeString&
247 0 : ChoiceFormat::toPattern(UnicodeString& result) const
248 : {
249 0 : return result = msgPattern.getPatternString();
250 : }
251 :
252 : // -------------------------------------
253 : // Sets the limit and format arrays.
254 : void
255 0 : ChoiceFormat::setChoices( const double* limits,
256 : const UnicodeString* formats,
257 : int32_t cnt )
258 : {
259 0 : UErrorCode errorCode = U_ZERO_ERROR;
260 0 : setChoices(limits, NULL, formats, cnt, errorCode);
261 0 : }
262 :
263 : // -------------------------------------
264 : // Sets the limit and format arrays.
265 : void
266 0 : ChoiceFormat::setChoices( const double* limits,
267 : const UBool* closures,
268 : const UnicodeString* formats,
269 : int32_t cnt )
270 : {
271 0 : UErrorCode errorCode = U_ZERO_ERROR;
272 0 : setChoices(limits, closures, formats, cnt, errorCode);
273 0 : }
274 :
275 : void
276 0 : ChoiceFormat::setChoices(const double* limits,
277 : const UBool* closures,
278 : const UnicodeString* formats,
279 : int32_t count,
280 : UErrorCode &errorCode) {
281 0 : if (U_FAILURE(errorCode)) {
282 0 : return;
283 : }
284 0 : if (limits == NULL || formats == NULL) {
285 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286 0 : return;
287 : }
288 : // Reconstruct the original input pattern.
289 : // Modified version of the pre-ICU 4.8 toPattern() implementation.
290 0 : UnicodeString result;
291 0 : for (int32_t i = 0; i < count; ++i) {
292 0 : if (i != 0) {
293 0 : result += VERTICAL_BAR;
294 : }
295 0 : UnicodeString buf;
296 0 : if (uprv_isPositiveInfinity(limits[i])) {
297 0 : result += INFINITY;
298 0 : } else if (uprv_isNegativeInfinity(limits[i])) {
299 0 : result += MINUS;
300 0 : result += INFINITY;
301 : } else {
302 0 : result += dtos(limits[i], buf);
303 : }
304 0 : if (closures != NULL && closures[i]) {
305 0 : result += LESS_THAN;
306 : } else {
307 0 : result += LESS_EQUAL;
308 : }
309 : // Append formats[i], using quotes if there are special
310 : // characters. Single quotes themselves must be escaped in
311 : // either case.
312 0 : const UnicodeString& text = formats[i];
313 0 : int32_t textLength = text.length();
314 0 : int32_t nestingLevel = 0;
315 0 : for (int32_t j = 0; j < textLength; ++j) {
316 0 : UChar c = text[j];
317 0 : if (c == SINGLE_QUOTE && nestingLevel == 0) {
318 : // Double each top-level apostrophe.
319 0 : result.append(c);
320 0 : } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321 : // Surround each pipe symbol with apostrophes for quoting.
322 : // If the next character is an apostrophe, then that will be doubled,
323 : // and although the parser will see the apostrophe pairs beginning
324 : // and ending one character earlier than our doubling, the result
325 : // is as desired.
326 : // | -> '|'
327 : // |' -> '|'''
328 : // |'' -> '|''''' etc.
329 0 : result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330 0 : continue; // Skip the append(c) at the end of the loop body.
331 0 : } else if (c == LEFT_CURLY_BRACE) {
332 0 : ++nestingLevel;
333 0 : } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334 0 : --nestingLevel;
335 : }
336 0 : result.append(c);
337 : }
338 : }
339 : // Apply the reconstructed pattern.
340 0 : applyPattern(result, errorCode);
341 : }
342 :
343 : // -------------------------------------
344 : // Gets the limit array.
345 :
346 : const double*
347 0 : ChoiceFormat::getLimits(int32_t& cnt) const
348 : {
349 0 : cnt = 0;
350 0 : return NULL;
351 : }
352 :
353 : // -------------------------------------
354 : // Gets the closures array.
355 :
356 : const UBool*
357 0 : ChoiceFormat::getClosures(int32_t& cnt) const
358 : {
359 0 : cnt = 0;
360 0 : return NULL;
361 : }
362 :
363 : // -------------------------------------
364 : // Gets the format array.
365 :
366 : const UnicodeString*
367 0 : ChoiceFormat::getFormats(int32_t& cnt) const
368 : {
369 0 : cnt = 0;
370 0 : return NULL;
371 : }
372 :
373 : // -------------------------------------
374 : // Formats an int64 number, it's actually formatted as
375 : // a double. The returned format string may differ
376 : // from the input number because of this.
377 :
378 : UnicodeString&
379 0 : ChoiceFormat::format(int64_t number,
380 : UnicodeString& appendTo,
381 : FieldPosition& status) const
382 : {
383 0 : return format((double) number, appendTo, status);
384 : }
385 :
386 : // -------------------------------------
387 : // Formats an int32_t number, it's actually formatted as
388 : // a double.
389 :
390 : UnicodeString&
391 0 : ChoiceFormat::format(int32_t number,
392 : UnicodeString& appendTo,
393 : FieldPosition& status) const
394 : {
395 0 : return format((double) number, appendTo, status);
396 : }
397 :
398 : // -------------------------------------
399 : // Formats a double number.
400 :
401 : UnicodeString&
402 0 : ChoiceFormat::format(double number,
403 : UnicodeString& appendTo,
404 : FieldPosition& /*pos*/) const
405 : {
406 0 : if (msgPattern.countParts() == 0) {
407 : // No pattern was applied, or it failed.
408 0 : return appendTo;
409 : }
410 : // Get the appropriate sub-message.
411 0 : int32_t msgStart = findSubMessage(msgPattern, 0, number);
412 0 : if (!MessageImpl::jdkAposMode(msgPattern)) {
413 0 : int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414 0 : int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415 : appendTo.append(msgPattern.getPatternString(),
416 : patternStart,
417 0 : msgPattern.getPatternIndex(msgLimit) - patternStart);
418 0 : return appendTo;
419 : }
420 : // JDK compatibility mode: Remove SKIP_SYNTAX.
421 0 : return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422 : }
423 :
424 : int32_t
425 0 : ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426 0 : int32_t count = pattern.countParts();
427 : int32_t msgStart;
428 : // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429 : // until ARG_LIMIT or end of choice-only pattern.
430 : // Ignore the first number and selector and start the loop on the first message.
431 0 : partIndex += 2;
432 : for (;;) {
433 : // Skip but remember the current sub-message.
434 0 : msgStart = partIndex;
435 0 : partIndex = pattern.getLimitPartIndex(partIndex);
436 0 : if (++partIndex >= count) {
437 : // Reached the end of the choice-only pattern.
438 : // Return with the last sub-message.
439 0 : break;
440 : }
441 0 : const MessagePattern::Part &part = pattern.getPart(partIndex++);
442 0 : UMessagePatternPartType type = part.getType();
443 0 : if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444 : // Reached the end of the ChoiceFormat style.
445 : // Return with the last sub-message.
446 0 : break;
447 : }
448 : // part is an ARG_INT or ARG_DOUBLE
449 0 : U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450 0 : double boundary = pattern.getNumericValue(part);
451 : // Fetch the ARG_SELECTOR character.
452 0 : int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453 0 : UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454 0 : if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455 : // The number is in the interval between the previous boundary and the current one.
456 : // Return with the sub-message between them.
457 : // The !(a>b) and !(a>=b) comparisons are equivalent to
458 : // (a<=b) and (a<b) except they "catch" NaN.
459 0 : break;
460 : }
461 0 : }
462 0 : return msgStart;
463 : }
464 :
465 : // -------------------------------------
466 : // Formats an array of objects. Checks if the data type of the objects
467 : // to get the right value for formatting.
468 :
469 : UnicodeString&
470 0 : ChoiceFormat::format(const Formattable* objs,
471 : int32_t cnt,
472 : UnicodeString& appendTo,
473 : FieldPosition& pos,
474 : UErrorCode& status) const
475 : {
476 0 : if(cnt < 0) {
477 0 : status = U_ILLEGAL_ARGUMENT_ERROR;
478 0 : return appendTo;
479 : }
480 0 : if (msgPattern.countParts() == 0) {
481 0 : status = U_INVALID_STATE_ERROR;
482 0 : return appendTo;
483 : }
484 :
485 0 : for (int32_t i = 0; i < cnt; i++) {
486 0 : double objDouble = objs[i].getDouble(status);
487 0 : if (U_SUCCESS(status)) {
488 0 : format(objDouble, appendTo, pos);
489 : }
490 : }
491 :
492 0 : return appendTo;
493 : }
494 :
495 : // -------------------------------------
496 :
497 : void
498 0 : ChoiceFormat::parse(const UnicodeString& text,
499 : Formattable& result,
500 : ParsePosition& pos) const
501 : {
502 0 : result.setDouble(parseArgument(msgPattern, 0, text, pos));
503 0 : }
504 :
505 : double
506 0 : ChoiceFormat::parseArgument(
507 : const MessagePattern &pattern, int32_t partIndex,
508 : const UnicodeString &source, ParsePosition &pos) {
509 : // find the best number (defined as the one with the longest parse)
510 0 : int32_t start = pos.getIndex();
511 0 : int32_t furthest = start;
512 0 : double bestNumber = uprv_getNaN();
513 0 : double tempNumber = 0.0;
514 0 : int32_t count = pattern.countParts();
515 0 : while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516 0 : tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517 0 : partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
518 0 : int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519 0 : int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520 0 : if (len >= 0) {
521 0 : int32_t newIndex = start + len;
522 0 : if (newIndex > furthest) {
523 0 : furthest = newIndex;
524 0 : bestNumber = tempNumber;
525 0 : if (furthest == source.length()) {
526 0 : break;
527 : }
528 : }
529 : }
530 0 : partIndex = msgLimit + 1;
531 : }
532 0 : if (furthest == start) {
533 0 : pos.setErrorIndex(start);
534 : } else {
535 0 : pos.setIndex(furthest);
536 : }
537 0 : return bestNumber;
538 : }
539 :
540 : int32_t
541 0 : ChoiceFormat::matchStringUntilLimitPart(
542 : const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543 : const UnicodeString &source, int32_t sourceOffset) {
544 0 : int32_t matchingSourceLength = 0;
545 0 : const UnicodeString &msgString = pattern.getPatternString();
546 0 : int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547 : for (;;) {
548 0 : const MessagePattern::Part &part = pattern.getPart(++partIndex);
549 0 : if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550 0 : int32_t index = part.getIndex();
551 0 : int32_t length = index - prevIndex;
552 0 : if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553 0 : return -1; // mismatch
554 : }
555 0 : matchingSourceLength += length;
556 0 : if (partIndex == limitPartIndex) {
557 0 : return matchingSourceLength;
558 : }
559 0 : prevIndex = part.getLimit(); // SKIP_SYNTAX
560 : }
561 0 : }
562 : }
563 :
564 : // -------------------------------------
565 :
566 : Format*
567 0 : ChoiceFormat::clone() const
568 : {
569 0 : ChoiceFormat *aCopy = new ChoiceFormat(*this);
570 0 : return aCopy;
571 : }
572 :
573 : U_NAMESPACE_END
574 :
575 : #endif /* #if !UCONFIG_NO_FORMATTING */
576 :
577 : //eof
|