Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2011-2013, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * file name: messagepattern.h
9 : * encoding: UTF-8
10 : * tab size: 8 (not used)
11 : * indentation:4
12 : *
13 : * created on: 2011mar14
14 : * created by: Markus W. Scherer
15 : */
16 :
17 : #ifndef __MESSAGEPATTERN_H__
18 : #define __MESSAGEPATTERN_H__
19 :
20 : /**
21 : * \file
22 : * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
23 : */
24 :
25 : #include "unicode/utypes.h"
26 :
27 : #if !UCONFIG_NO_FORMATTING
28 :
29 : #include "unicode/parseerr.h"
30 : #include "unicode/unistr.h"
31 :
32 : /**
33 : * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
34 : * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
35 : * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
36 : * <p>
37 : * A pair of adjacent apostrophes always results in a single apostrophe in the output,
38 : * even when the pair is between two single, text-quoting apostrophes.
39 : * <p>
40 : * The following table shows examples of desired MessageFormat.format() output
41 : * with the pattern strings that yield that output.
42 : * <p>
43 : * <table>
44 : * <tr>
45 : * <th>Desired output</th>
46 : * <th>DOUBLE_OPTIONAL</th>
47 : * <th>DOUBLE_REQUIRED</th>
48 : * </tr>
49 : * <tr>
50 : * <td>I see {many}</td>
51 : * <td>I see '{many}'</td>
52 : * <td>(same)</td>
53 : * </tr>
54 : * <tr>
55 : * <td>I said {'Wow!'}</td>
56 : * <td>I said '{''Wow!''}'</td>
57 : * <td>(same)</td>
58 : * </tr>
59 : * <tr>
60 : * <td>I don't know</td>
61 : * <td>I don't know OR<br> I don''t know</td>
62 : * <td>I don''t know</td>
63 : * </tr>
64 : * </table>
65 : * @stable ICU 4.8
66 : * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
67 : */
68 : enum UMessagePatternApostropheMode {
69 : /**
70 : * A literal apostrophe is represented by
71 : * either a single or a double apostrophe pattern character.
72 : * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
73 : * if it immediately precedes a curly brace {},
74 : * or a pipe symbol | if inside a choice format,
75 : * or a pound symbol # if inside a plural format.
76 : * <p>
77 : * This is the default behavior starting with ICU 4.8.
78 : * @stable ICU 4.8
79 : */
80 : UMSGPAT_APOS_DOUBLE_OPTIONAL,
81 : /**
82 : * A literal apostrophe must be represented by
83 : * a double apostrophe pattern character.
84 : * A single apostrophe always starts quoted literal text.
85 : * <p>
86 : * This is the behavior of ICU 4.6 and earlier, and of the JDK.
87 : * @stable ICU 4.8
88 : */
89 : UMSGPAT_APOS_DOUBLE_REQUIRED
90 : };
91 : /**
92 : * @stable ICU 4.8
93 : */
94 : typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
95 :
96 : /**
97 : * MessagePattern::Part type constants.
98 : * @stable ICU 4.8
99 : */
100 : enum UMessagePatternPartType {
101 : /**
102 : * Start of a message pattern (main or nested).
103 : * The length is 0 for the top-level message
104 : * and for a choice argument sub-message, otherwise 1 for the '{'.
105 : * The value indicates the nesting level, starting with 0 for the main message.
106 : * <p>
107 : * There is always a later MSG_LIMIT part.
108 : * @stable ICU 4.8
109 : */
110 : UMSGPAT_PART_TYPE_MSG_START,
111 : /**
112 : * End of a message pattern (main or nested).
113 : * The length is 0 for the top-level message and
114 : * the last sub-message of a choice argument,
115 : * otherwise 1 for the '}' or (in a choice argument style) the '|'.
116 : * The value indicates the nesting level, starting with 0 for the main message.
117 : * @stable ICU 4.8
118 : */
119 : UMSGPAT_PART_TYPE_MSG_LIMIT,
120 : /**
121 : * Indicates a substring of the pattern string which is to be skipped when formatting.
122 : * For example, an apostrophe that begins or ends quoted text
123 : * would be indicated with such a part.
124 : * The value is undefined and currently always 0.
125 : * @stable ICU 4.8
126 : */
127 : UMSGPAT_PART_TYPE_SKIP_SYNTAX,
128 : /**
129 : * Indicates that a syntax character needs to be inserted for auto-quoting.
130 : * The length is 0.
131 : * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
132 : * @stable ICU 4.8
133 : */
134 : UMSGPAT_PART_TYPE_INSERT_CHAR,
135 : /**
136 : * Indicates a syntactic (non-escaped) # symbol in a plural variant.
137 : * When formatting, replace this part's substring with the
138 : * (value-offset) for the plural argument value.
139 : * The value is undefined and currently always 0.
140 : * @stable ICU 4.8
141 : */
142 : UMSGPAT_PART_TYPE_REPLACE_NUMBER,
143 : /**
144 : * Start of an argument.
145 : * The length is 1 for the '{'.
146 : * The value is the ordinal value of the ArgType. Use getArgType().
147 : * <p>
148 : * This part is followed by either an ARG_NUMBER or ARG_NAME,
149 : * followed by optional argument sub-parts (see UMessagePatternArgType constants)
150 : * and finally an ARG_LIMIT part.
151 : * @stable ICU 4.8
152 : */
153 : UMSGPAT_PART_TYPE_ARG_START,
154 : /**
155 : * End of an argument.
156 : * The length is 1 for the '}'.
157 : * The value is the ordinal value of the ArgType. Use getArgType().
158 : * @stable ICU 4.8
159 : */
160 : UMSGPAT_PART_TYPE_ARG_LIMIT,
161 : /**
162 : * The argument number, provided by the value.
163 : * @stable ICU 4.8
164 : */
165 : UMSGPAT_PART_TYPE_ARG_NUMBER,
166 : /**
167 : * The argument name.
168 : * The value is undefined and currently always 0.
169 : * @stable ICU 4.8
170 : */
171 : UMSGPAT_PART_TYPE_ARG_NAME,
172 : /**
173 : * The argument type.
174 : * The value is undefined and currently always 0.
175 : * @stable ICU 4.8
176 : */
177 : UMSGPAT_PART_TYPE_ARG_TYPE,
178 : /**
179 : * The argument style text.
180 : * The value is undefined and currently always 0.
181 : * @stable ICU 4.8
182 : */
183 : UMSGPAT_PART_TYPE_ARG_STYLE,
184 : /**
185 : * A selector substring in a "complex" argument style.
186 : * The value is undefined and currently always 0.
187 : * @stable ICU 4.8
188 : */
189 : UMSGPAT_PART_TYPE_ARG_SELECTOR,
190 : /**
191 : * An integer value, for example the offset or an explicit selector value
192 : * in a PluralFormat style.
193 : * The part value is the integer value.
194 : * @stable ICU 4.8
195 : */
196 : UMSGPAT_PART_TYPE_ARG_INT,
197 : /**
198 : * A numeric value, for example the offset or an explicit selector value
199 : * in a PluralFormat style.
200 : * The part value is an index into an internal array of numeric values;
201 : * use getNumericValue().
202 : * @stable ICU 4.8
203 : */
204 : UMSGPAT_PART_TYPE_ARG_DOUBLE
205 : };
206 : /**
207 : * @stable ICU 4.8
208 : */
209 : typedef enum UMessagePatternPartType UMessagePatternPartType;
210 :
211 : /**
212 : * Argument type constants.
213 : * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
214 : *
215 : * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
216 : * with a nesting level one greater than the surrounding message.
217 : * @stable ICU 4.8
218 : */
219 : enum UMessagePatternArgType {
220 : /**
221 : * The argument has no specified type.
222 : * @stable ICU 4.8
223 : */
224 : UMSGPAT_ARG_TYPE_NONE,
225 : /**
226 : * The argument has a "simple" type which is provided by the ARG_TYPE part.
227 : * An ARG_STYLE part might follow that.
228 : * @stable ICU 4.8
229 : */
230 : UMSGPAT_ARG_TYPE_SIMPLE,
231 : /**
232 : * The argument is a ChoiceFormat with one or more
233 : * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
234 : * @stable ICU 4.8
235 : */
236 : UMSGPAT_ARG_TYPE_CHOICE,
237 : /**
238 : * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
239 : * (e.g., offset:1)
240 : * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
241 : * If the selector has an explicit value (e.g., =2), then
242 : * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
243 : * Otherwise the message immediately follows the ARG_SELECTOR.
244 : * @stable ICU 4.8
245 : */
246 : UMSGPAT_ARG_TYPE_PLURAL,
247 : /**
248 : * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
249 : * @stable ICU 4.8
250 : */
251 : UMSGPAT_ARG_TYPE_SELECT,
252 : /**
253 : * The argument is an ordinal-number PluralFormat
254 : * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
255 : * @stable ICU 50
256 : */
257 : UMSGPAT_ARG_TYPE_SELECTORDINAL
258 : };
259 : /**
260 : * @stable ICU 4.8
261 : */
262 : typedef enum UMessagePatternArgType UMessagePatternArgType;
263 :
264 : /**
265 : * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
266 : * Returns TRUE if the argument type has a plural style part sequence and semantics,
267 : * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
268 : * @stable ICU 50
269 : */
270 : #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
271 : ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
272 :
273 : enum {
274 : /**
275 : * Return value from MessagePattern.validateArgumentName() for when
276 : * the string is a valid "pattern identifier" but not a number.
277 : * @stable ICU 4.8
278 : */
279 : UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
280 :
281 : /**
282 : * Return value from MessagePattern.validateArgumentName() for when
283 : * the string is invalid.
284 : * It might not be a valid "pattern identifier",
285 : * or it have only ASCII digits but there is a leading zero or the number is too large.
286 : * @stable ICU 4.8
287 : */
288 : UMSGPAT_ARG_NAME_NOT_VALID=-2
289 : };
290 :
291 : /**
292 : * Special value that is returned by getNumericValue(Part) when no
293 : * numeric value is defined for a part.
294 : * @see MessagePattern.getNumericValue()
295 : * @stable ICU 4.8
296 : */
297 : #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
298 :
299 : U_NAMESPACE_BEGIN
300 :
301 : class MessagePatternDoubleList;
302 : class MessagePatternPartsList;
303 :
304 : /**
305 : * Parses and represents ICU MessageFormat patterns.
306 : * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
307 : * Used in the implementations of those classes as well as in tools
308 : * for message validation, translation and format conversion.
309 : * <p>
310 : * The parser handles all syntax relevant for identifying message arguments.
311 : * This includes "complex" arguments whose style strings contain
312 : * nested MessageFormat pattern substrings.
313 : * For "simple" arguments (with no nested MessageFormat pattern substrings),
314 : * the argument style is not parsed any further.
315 : * <p>
316 : * The parser handles named and numbered message arguments and allows both in one message.
317 : * <p>
318 : * Once a pattern has been parsed successfully, iterate through the parsed data
319 : * with countParts(), getPart() and related methods.
320 : * <p>
321 : * The data logically represents a parse tree, but is stored and accessed
322 : * as a list of "parts" for fast and simple parsing and to minimize object allocations.
323 : * Arguments and nested messages are best handled via recursion.
324 : * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
325 : * the index of the corresponding _LIMIT "part".
326 : * <p>
327 : * List of "parts":
328 : * <pre>
329 : * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
330 : * argument = noneArg | simpleArg | complexArg
331 : * complexArg = choiceArg | pluralArg | selectArg
332 : *
333 : * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
334 : * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
335 : * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
336 : * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
337 : * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
338 : *
339 : * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
340 : * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
341 : * selectStyle = (ARG_SELECTOR message)+
342 : * </pre>
343 : * <ul>
344 : * <li>Literal output text is not represented directly by "parts" but accessed
345 : * between parts of a message, from one part's getLimit() to the next part's getIndex().
346 : * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
347 : * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
348 : * the less-than-or-equal-to sign (U+2264).
349 : * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
350 : * The optional numeric Part between each (ARG_SELECTOR, message) pair
351 : * is the value of an explicit-number selector like "=2",
352 : * otherwise the selector is a non-numeric identifier.
353 : * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
354 : * </ul>
355 : * <p>
356 : * This class is not intended for public subclassing.
357 : *
358 : * @stable ICU 4.8
359 : */
360 : class U_COMMON_API MessagePattern : public UObject {
361 : public:
362 : /**
363 : * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
364 : * @param errorCode Standard ICU error code. Its input value must
365 : * pass the U_SUCCESS() test, or else the function returns
366 : * immediately. Check for U_FAILURE() on output or use with
367 : * function chaining. (See User Guide for details.)
368 : * @stable ICU 4.8
369 : */
370 : MessagePattern(UErrorCode &errorCode);
371 :
372 : /**
373 : * Constructs an empty MessagePattern.
374 : * @param mode Explicit UMessagePatternApostropheMode.
375 : * @param errorCode Standard ICU error code. Its input value must
376 : * pass the U_SUCCESS() test, or else the function returns
377 : * immediately. Check for U_FAILURE() on output or use with
378 : * function chaining. (See User Guide for details.)
379 : * @stable ICU 4.8
380 : */
381 : MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
382 :
383 : /**
384 : * Constructs a MessagePattern with default UMessagePatternApostropheMode and
385 : * parses the MessageFormat pattern string.
386 : * @param pattern a MessageFormat pattern string
387 : * @param parseError Struct to receive information on the position
388 : * of an error within the pattern.
389 : * Can be NULL.
390 : * @param errorCode Standard ICU error code. Its input value must
391 : * pass the U_SUCCESS() test, or else the function returns
392 : * immediately. Check for U_FAILURE() on output or use with
393 : * function chaining. (See User Guide for details.)
394 : * TODO: turn @throws into UErrorCode specifics?
395 : * @throws IllegalArgumentException for syntax errors in the pattern string
396 : * @throws IndexOutOfBoundsException if certain limits are exceeded
397 : * (e.g., argument number too high, argument name too long, etc.)
398 : * @throws NumberFormatException if a number could not be parsed
399 : * @stable ICU 4.8
400 : */
401 : MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
402 :
403 : /**
404 : * Copy constructor.
405 : * @param other Object to copy.
406 : * @stable ICU 4.8
407 : */
408 : MessagePattern(const MessagePattern &other);
409 :
410 : /**
411 : * Assignment operator.
412 : * @param other Object to copy.
413 : * @return *this=other
414 : * @stable ICU 4.8
415 : */
416 : MessagePattern &operator=(const MessagePattern &other);
417 :
418 : /**
419 : * Destructor.
420 : * @stable ICU 4.8
421 : */
422 : virtual ~MessagePattern();
423 :
424 : /**
425 : * Parses a MessageFormat pattern string.
426 : * @param pattern a MessageFormat pattern string
427 : * @param parseError Struct to receive information on the position
428 : * of an error within the pattern.
429 : * Can be NULL.
430 : * @param errorCode Standard ICU error code. Its input value must
431 : * pass the U_SUCCESS() test, or else the function returns
432 : * immediately. Check for U_FAILURE() on output or use with
433 : * function chaining. (See User Guide for details.)
434 : * @return *this
435 : * @throws IllegalArgumentException for syntax errors in the pattern string
436 : * @throws IndexOutOfBoundsException if certain limits are exceeded
437 : * (e.g., argument number too high, argument name too long, etc.)
438 : * @throws NumberFormatException if a number could not be parsed
439 : * @stable ICU 4.8
440 : */
441 : MessagePattern &parse(const UnicodeString &pattern,
442 : UParseError *parseError, UErrorCode &errorCode);
443 :
444 : /**
445 : * Parses a ChoiceFormat pattern string.
446 : * @param pattern a ChoiceFormat pattern string
447 : * @param parseError Struct to receive information on the position
448 : * of an error within the pattern.
449 : * Can be NULL.
450 : * @param errorCode Standard ICU error code. Its input value must
451 : * pass the U_SUCCESS() test, or else the function returns
452 : * immediately. Check for U_FAILURE() on output or use with
453 : * function chaining. (See User Guide for details.)
454 : * @return *this
455 : * @throws IllegalArgumentException for syntax errors in the pattern string
456 : * @throws IndexOutOfBoundsException if certain limits are exceeded
457 : * (e.g., argument number too high, argument name too long, etc.)
458 : * @throws NumberFormatException if a number could not be parsed
459 : * @stable ICU 4.8
460 : */
461 : MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
462 : UParseError *parseError, UErrorCode &errorCode);
463 :
464 : /**
465 : * Parses a PluralFormat pattern string.
466 : * @param pattern a PluralFormat pattern string
467 : * @param parseError Struct to receive information on the position
468 : * of an error within the pattern.
469 : * Can be NULL.
470 : * @param errorCode Standard ICU error code. Its input value must
471 : * pass the U_SUCCESS() test, or else the function returns
472 : * immediately. Check for U_FAILURE() on output or use with
473 : * function chaining. (See User Guide for details.)
474 : * @return *this
475 : * @throws IllegalArgumentException for syntax errors in the pattern string
476 : * @throws IndexOutOfBoundsException if certain limits are exceeded
477 : * (e.g., argument number too high, argument name too long, etc.)
478 : * @throws NumberFormatException if a number could not be parsed
479 : * @stable ICU 4.8
480 : */
481 : MessagePattern &parsePluralStyle(const UnicodeString &pattern,
482 : UParseError *parseError, UErrorCode &errorCode);
483 :
484 : /**
485 : * Parses a SelectFormat pattern string.
486 : * @param pattern a SelectFormat pattern string
487 : * @param parseError Struct to receive information on the position
488 : * of an error within the pattern.
489 : * Can be NULL.
490 : * @param errorCode Standard ICU error code. Its input value must
491 : * pass the U_SUCCESS() test, or else the function returns
492 : * immediately. Check for U_FAILURE() on output or use with
493 : * function chaining. (See User Guide for details.)
494 : * @return *this
495 : * @throws IllegalArgumentException for syntax errors in the pattern string
496 : * @throws IndexOutOfBoundsException if certain limits are exceeded
497 : * (e.g., argument number too high, argument name too long, etc.)
498 : * @throws NumberFormatException if a number could not be parsed
499 : * @stable ICU 4.8
500 : */
501 : MessagePattern &parseSelectStyle(const UnicodeString &pattern,
502 : UParseError *parseError, UErrorCode &errorCode);
503 :
504 : /**
505 : * Clears this MessagePattern.
506 : * countParts() will return 0.
507 : * @stable ICU 4.8
508 : */
509 : void clear();
510 :
511 : /**
512 : * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
513 : * countParts() will return 0.
514 : * @param mode The new UMessagePatternApostropheMode.
515 : * @stable ICU 4.8
516 : */
517 0 : void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
518 0 : clear();
519 0 : aposMode=mode;
520 0 : }
521 :
522 : /**
523 : * @param other another object to compare with.
524 : * @return TRUE if this object is equivalent to the other one.
525 : * @stable ICU 4.8
526 : */
527 : UBool operator==(const MessagePattern &other) const;
528 :
529 : /**
530 : * @param other another object to compare with.
531 : * @return FALSE if this object is equivalent to the other one.
532 : * @stable ICU 4.8
533 : */
534 0 : inline UBool operator!=(const MessagePattern &other) const {
535 0 : return !operator==(other);
536 : }
537 :
538 : /**
539 : * @return A hash code for this object.
540 : * @stable ICU 4.8
541 : */
542 : int32_t hashCode() const;
543 :
544 : /**
545 : * @return this instance's UMessagePatternApostropheMode.
546 : * @stable ICU 4.8
547 : */
548 0 : UMessagePatternApostropheMode getApostropheMode() const {
549 0 : return aposMode;
550 : }
551 :
552 : // Java has package-private jdkAposMode() here.
553 : // In C++, this is declared in the MessageImpl class.
554 :
555 : /**
556 : * @return the parsed pattern string (null if none was parsed).
557 : * @stable ICU 4.8
558 : */
559 0 : const UnicodeString &getPatternString() const {
560 0 : return msg;
561 : }
562 :
563 : /**
564 : * Does the parsed pattern have named arguments like {first_name}?
565 : * @return TRUE if the parsed pattern has at least one named argument.
566 : * @stable ICU 4.8
567 : */
568 0 : UBool hasNamedArguments() const {
569 0 : return hasArgNames;
570 : }
571 :
572 : /**
573 : * Does the parsed pattern have numbered arguments like {2}?
574 : * @return TRUE if the parsed pattern has at least one numbered argument.
575 : * @stable ICU 4.8
576 : */
577 : UBool hasNumberedArguments() const {
578 : return hasArgNumbers;
579 : }
580 :
581 : /**
582 : * Validates and parses an argument name or argument number string.
583 : * An argument name must be a "pattern identifier", that is, it must contain
584 : * no Unicode Pattern_Syntax or Pattern_White_Space characters.
585 : * If it only contains ASCII digits, then it must be a small integer with no leading zero.
586 : * @param name Input string.
587 : * @return >=0 if the name is a valid number,
588 : * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
589 : * ARG_NAME_NOT_VALID (-2) if it is neither.
590 : * @stable ICU 4.8
591 : */
592 : static int32_t validateArgumentName(const UnicodeString &name);
593 :
594 : /**
595 : * Returns a version of the parsed pattern string where each ASCII apostrophe
596 : * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
597 : * <p>
598 : * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
599 : * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
600 : * @return the deep-auto-quoted version of the parsed pattern string.
601 : * @see MessageFormat.autoQuoteApostrophe()
602 : * @stable ICU 4.8
603 : */
604 : UnicodeString autoQuoteApostropheDeep() const;
605 :
606 : class Part;
607 :
608 : /**
609 : * Returns the number of "parts" created by parsing the pattern string.
610 : * Returns 0 if no pattern has been parsed or clear() was called.
611 : * @return the number of pattern parts.
612 : * @stable ICU 4.8
613 : */
614 0 : int32_t countParts() const {
615 0 : return partsLength;
616 : }
617 :
618 : /**
619 : * Gets the i-th pattern "part".
620 : * @param i The index of the Part data. (0..countParts()-1)
621 : * @return the i-th pattern "part".
622 : * @stable ICU 4.8
623 : */
624 0 : const Part &getPart(int32_t i) const {
625 0 : return parts[i];
626 : }
627 :
628 : /**
629 : * Returns the UMessagePatternPartType of the i-th pattern "part".
630 : * Convenience method for getPart(i).getType().
631 : * @param i The index of the Part data. (0..countParts()-1)
632 : * @return The UMessagePatternPartType of the i-th Part.
633 : * @stable ICU 4.8
634 : */
635 0 : UMessagePatternPartType getPartType(int32_t i) const {
636 0 : return getPart(i).type;
637 : }
638 :
639 : /**
640 : * Returns the pattern index of the specified pattern "part".
641 : * Convenience method for getPart(partIndex).getIndex().
642 : * @param partIndex The index of the Part data. (0..countParts()-1)
643 : * @return The pattern index of this Part.
644 : * @stable ICU 4.8
645 : */
646 0 : int32_t getPatternIndex(int32_t partIndex) const {
647 0 : return getPart(partIndex).index;
648 : }
649 :
650 : /**
651 : * Returns the substring of the pattern string indicated by the Part.
652 : * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
653 : * @param part a part of this MessagePattern.
654 : * @return the substring associated with part.
655 : * @stable ICU 4.8
656 : */
657 0 : UnicodeString getSubstring(const Part &part) const {
658 0 : return msg.tempSubString(part.index, part.length);
659 : }
660 :
661 : /**
662 : * Compares the part's substring with the input string s.
663 : * @param part a part of this MessagePattern.
664 : * @param s a string.
665 : * @return TRUE if getSubstring(part).equals(s).
666 : * @stable ICU 4.8
667 : */
668 0 : UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
669 0 : return 0==msg.compare(part.index, part.length, s);
670 : }
671 :
672 : /**
673 : * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
674 : * @param part a part of this MessagePattern.
675 : * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
676 : * @stable ICU 4.8
677 : */
678 : double getNumericValue(const Part &part) const;
679 :
680 : /**
681 : * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
682 : * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
683 : * @return the "offset:" value.
684 : * @stable ICU 4.8
685 : */
686 : double getPluralOffset(int32_t pluralStart) const;
687 :
688 : /**
689 : * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
690 : * @param start The index of some Part data (0..countParts()-1);
691 : * this Part should be of Type ARG_START or MSG_START.
692 : * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
693 : * or start itself if getPartType(msgStart)!=ARG|MSG_START.
694 : * @stable ICU 4.8
695 : */
696 0 : int32_t getLimitPartIndex(int32_t start) const {
697 0 : int32_t limit=getPart(start).limitPartIndex;
698 0 : if(limit<start) {
699 0 : return start;
700 : }
701 0 : return limit;
702 : }
703 :
704 : /**
705 : * A message pattern "part", representing a pattern parsing event.
706 : * There is a part for the start and end of a message or argument,
707 : * for quoting and escaping of and with ASCII apostrophes,
708 : * and for syntax elements of "complex" arguments.
709 : * @stable ICU 4.8
710 : */
711 : class Part : public UMemory {
712 : public:
713 : /**
714 : * Default constructor, do not use.
715 : * @internal
716 : */
717 0 : Part() {}
718 :
719 : /**
720 : * Returns the type of this part.
721 : * @return the part type.
722 : * @stable ICU 4.8
723 : */
724 0 : UMessagePatternPartType getType() const {
725 0 : return type;
726 : }
727 :
728 : /**
729 : * Returns the pattern string index associated with this Part.
730 : * @return this part's pattern string index.
731 : * @stable ICU 4.8
732 : */
733 0 : int32_t getIndex() const {
734 0 : return index;
735 : }
736 :
737 : /**
738 : * Returns the length of the pattern substring associated with this Part.
739 : * This is 0 for some parts.
740 : * @return this part's pattern substring length.
741 : * @stable ICU 4.8
742 : */
743 : int32_t getLength() const {
744 : return length;
745 : }
746 :
747 : /**
748 : * Returns the pattern string limit (exclusive-end) index associated with this Part.
749 : * Convenience method for getIndex()+getLength().
750 : * @return this part's pattern string limit index, same as getIndex()+getLength().
751 : * @stable ICU 4.8
752 : */
753 0 : int32_t getLimit() const {
754 0 : return index+length;
755 : }
756 :
757 : /**
758 : * Returns a value associated with this part.
759 : * See the documentation of each part type for details.
760 : * @return the part value.
761 : * @stable ICU 4.8
762 : */
763 0 : int32_t getValue() const {
764 0 : return value;
765 : }
766 :
767 : /**
768 : * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
769 : * otherwise UMSGPAT_ARG_TYPE_NONE.
770 : * @return the argument type for this part.
771 : * @stable ICU 4.8
772 : */
773 0 : UMessagePatternArgType getArgType() const {
774 0 : UMessagePatternPartType type=getType();
775 0 : if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
776 0 : return (UMessagePatternArgType)value;
777 : } else {
778 0 : return UMSGPAT_ARG_TYPE_NONE;
779 : }
780 : }
781 :
782 : /**
783 : * Indicates whether the Part type has a numeric value.
784 : * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
785 : * @param type The Part type to be tested.
786 : * @return TRUE if the Part type has a numeric value.
787 : * @stable ICU 4.8
788 : */
789 0 : static UBool hasNumericValue(UMessagePatternPartType type) {
790 0 : return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
791 : }
792 :
793 : /**
794 : * @param other another object to compare with.
795 : * @return TRUE if this object is equivalent to the other one.
796 : * @stable ICU 4.8
797 : */
798 : UBool operator==(const Part &other) const;
799 :
800 : /**
801 : * @param other another object to compare with.
802 : * @return FALSE if this object is equivalent to the other one.
803 : * @stable ICU 4.8
804 : */
805 0 : inline UBool operator!=(const Part &other) const {
806 0 : return !operator==(other);
807 : }
808 :
809 : /**
810 : * @return A hash code for this object.
811 : * @stable ICU 4.8
812 : */
813 0 : int32_t hashCode() const {
814 0 : return ((type*37+index)*37+length)*37+value;
815 : }
816 :
817 : private:
818 : friend class MessagePattern;
819 :
820 : static const int32_t MAX_LENGTH=0xffff;
821 : static const int32_t MAX_VALUE=0x7fff;
822 :
823 : // Some fields are not final because they are modified during pattern parsing.
824 : // After pattern parsing, the parts are effectively immutable.
825 : UMessagePatternPartType type;
826 : int32_t index;
827 : uint16_t length;
828 : int16_t value;
829 : int32_t limitPartIndex;
830 : };
831 :
832 : private:
833 : void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
834 :
835 : void postParse();
836 :
837 : int32_t parseMessage(int32_t index, int32_t msgStartLength,
838 : int32_t nestingLevel, UMessagePatternArgType parentType,
839 : UParseError *parseError, UErrorCode &errorCode);
840 :
841 : int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
842 : UParseError *parseError, UErrorCode &errorCode);
843 :
844 : int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
845 :
846 : int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
847 : UParseError *parseError, UErrorCode &errorCode);
848 :
849 : int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
850 : UParseError *parseError, UErrorCode &errorCode);
851 :
852 : /**
853 : * Validates and parses an argument name or argument number string.
854 : * This internal method assumes that the input substring is a "pattern identifier".
855 : * @return >=0 if the name is a valid number,
856 : * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
857 : * ARG_NAME_NOT_VALID (-2) if it is neither.
858 : * @see #validateArgumentName(String)
859 : */
860 : static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
861 :
862 0 : int32_t parseArgNumber(int32_t start, int32_t limit) {
863 0 : return parseArgNumber(msg, start, limit);
864 : }
865 :
866 : /**
867 : * Parses a number from the specified message substring.
868 : * @param start start index into the message string
869 : * @param limit limit index into the message string, must be start<limit
870 : * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
871 : * @param parseError
872 : * @param errorCode
873 : */
874 : void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
875 : UParseError *parseError, UErrorCode &errorCode);
876 :
877 : // Java has package-private appendReducedApostrophes() here.
878 : // In C++, this is declared in the MessageImpl class.
879 :
880 : int32_t skipWhiteSpace(int32_t index);
881 :
882 : int32_t skipIdentifier(int32_t index);
883 :
884 : /**
885 : * Skips a sequence of characters that could occur in a double value.
886 : * Does not fully parse or validate the value.
887 : */
888 : int32_t skipDouble(int32_t index);
889 :
890 : static UBool isArgTypeChar(UChar32 c);
891 :
892 : UBool isChoice(int32_t index);
893 :
894 : UBool isPlural(int32_t index);
895 :
896 : UBool isSelect(int32_t index);
897 :
898 : UBool isOrdinal(int32_t index);
899 :
900 : /**
901 : * @return TRUE if we are inside a MessageFormat (sub-)pattern,
902 : * as opposed to inside a top-level choice/plural/select pattern.
903 : */
904 : UBool inMessageFormatPattern(int32_t nestingLevel);
905 :
906 : /**
907 : * @return TRUE if we are in a MessageFormat sub-pattern
908 : * of a top-level ChoiceFormat pattern.
909 : */
910 : UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
911 :
912 : void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
913 : int32_t value, UErrorCode &errorCode);
914 :
915 : void addLimitPart(int32_t start,
916 : UMessagePatternPartType type, int32_t index, int32_t length,
917 : int32_t value, UErrorCode &errorCode);
918 :
919 : void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
920 :
921 : void setParseError(UParseError *parseError, int32_t index);
922 :
923 : UBool init(UErrorCode &errorCode);
924 : UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925 :
926 : UMessagePatternApostropheMode aposMode;
927 : UnicodeString msg;
928 : // ArrayList<Part> parts=new ArrayList<Part>();
929 : MessagePatternPartsList *partsList;
930 : Part *parts;
931 : int32_t partsLength;
932 : // ArrayList<Double> numericValues;
933 : MessagePatternDoubleList *numericValuesList;
934 : double *numericValues;
935 : int32_t numericValuesLength;
936 : UBool hasArgNames;
937 : UBool hasArgNumbers;
938 : UBool needsAutoQuoting;
939 : };
940 :
941 : U_NAMESPACE_END
942 :
943 : #endif // !UCONFIG_NO_FORMATTING
944 :
945 : #endif // __MESSAGEPATTERN_H__
|