LCOV - code coverage report
Current view: top level - intl/icu/source/common/unicode - messagepattern.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 51 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 22 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // © 2016 and later: Unicode, Inc. and others.
       2             : // License & terms of use: http://www.unicode.org/copyright.html
       3             : /*
       4             : *******************************************************************************
       5             : *   Copyright (C) 2011-2013, International Business Machines
       6             : *   Corporation and others.  All Rights Reserved.
       7             : *******************************************************************************
       8             : *   file name:  messagepattern.h
       9             : *   encoding:   UTF-8
      10             : *   tab size:   8 (not used)
      11             : *   indentation:4
      12             : *
      13             : *   created on: 2011mar14
      14             : *   created by: Markus W. Scherer
      15             : */
      16             : 
      17             : #ifndef __MESSAGEPATTERN_H__
      18             : #define __MESSAGEPATTERN_H__
      19             : 
      20             : /**
      21             :  * \file
      22             :  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
      23             :  */
      24             : 
      25             : #include "unicode/utypes.h"
      26             : 
      27             : #if !UCONFIG_NO_FORMATTING
      28             : 
      29             : #include "unicode/parseerr.h"
      30             : #include "unicode/unistr.h"
      31             : 
      32             : /**
      33             :  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
      34             :  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
      35             :  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
      36             :  * <p>
      37             :  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
      38             :  * even when the pair is between two single, text-quoting apostrophes.
      39             :  * <p>
      40             :  * The following table shows examples of desired MessageFormat.format() output
      41             :  * with the pattern strings that yield that output.
      42             :  * <p>
      43             :  * <table>
      44             :  *   <tr>
      45             :  *     <th>Desired output</th>
      46             :  *     <th>DOUBLE_OPTIONAL</th>
      47             :  *     <th>DOUBLE_REQUIRED</th>
      48             :  *   </tr>
      49             :  *   <tr>
      50             :  *     <td>I see {many}</td>
      51             :  *     <td>I see '{many}'</td>
      52             :  *     <td>(same)</td>
      53             :  *   </tr>
      54             :  *   <tr>
      55             :  *     <td>I said {'Wow!'}</td>
      56             :  *     <td>I said '{''Wow!''}'</td>
      57             :  *     <td>(same)</td>
      58             :  *   </tr>
      59             :  *   <tr>
      60             :  *     <td>I don't know</td>
      61             :  *     <td>I don't know OR<br> I don''t know</td>
      62             :  *     <td>I don''t know</td>
      63             :  *   </tr>
      64             :  * </table>
      65             :  * @stable ICU 4.8
      66             :  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
      67             :  */
      68             : enum UMessagePatternApostropheMode {
      69             :     /**
      70             :      * A literal apostrophe is represented by
      71             :      * either a single or a double apostrophe pattern character.
      72             :      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
      73             :      * if it immediately precedes a curly brace {},
      74             :      * or a pipe symbol | if inside a choice format,
      75             :      * or a pound symbol # if inside a plural format.
      76             :      * <p>
      77             :      * This is the default behavior starting with ICU 4.8.
      78             :      * @stable ICU 4.8
      79             :      */
      80             :     UMSGPAT_APOS_DOUBLE_OPTIONAL,
      81             :     /**
      82             :      * A literal apostrophe must be represented by
      83             :      * a double apostrophe pattern character.
      84             :      * A single apostrophe always starts quoted literal text.
      85             :      * <p>
      86             :      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
      87             :      * @stable ICU 4.8
      88             :      */
      89             :     UMSGPAT_APOS_DOUBLE_REQUIRED
      90             : };
      91             : /**
      92             :  * @stable ICU 4.8
      93             :  */
      94             : typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
      95             : 
      96             : /**
      97             :  * MessagePattern::Part type constants.
      98             :  * @stable ICU 4.8
      99             :  */
     100             : enum UMessagePatternPartType {
     101             :     /**
     102             :      * Start of a message pattern (main or nested).
     103             :      * The length is 0 for the top-level message
     104             :      * and for a choice argument sub-message, otherwise 1 for the '{'.
     105             :      * The value indicates the nesting level, starting with 0 for the main message.
     106             :      * <p>
     107             :      * There is always a later MSG_LIMIT part.
     108             :      * @stable ICU 4.8
     109             :      */
     110             :     UMSGPAT_PART_TYPE_MSG_START,
     111             :     /**
     112             :      * End of a message pattern (main or nested).
     113             :      * The length is 0 for the top-level message and
     114             :      * the last sub-message of a choice argument,
     115             :      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
     116             :      * The value indicates the nesting level, starting with 0 for the main message.
     117             :      * @stable ICU 4.8
     118             :      */
     119             :     UMSGPAT_PART_TYPE_MSG_LIMIT,
     120             :     /**
     121             :      * Indicates a substring of the pattern string which is to be skipped when formatting.
     122             :      * For example, an apostrophe that begins or ends quoted text
     123             :      * would be indicated with such a part.
     124             :      * The value is undefined and currently always 0.
     125             :      * @stable ICU 4.8
     126             :      */
     127             :     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
     128             :     /**
     129             :      * Indicates that a syntax character needs to be inserted for auto-quoting.
     130             :      * The length is 0.
     131             :      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
     132             :      * @stable ICU 4.8
     133             :      */
     134             :     UMSGPAT_PART_TYPE_INSERT_CHAR,
     135             :     /**
     136             :      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
     137             :      * When formatting, replace this part's substring with the
     138             :      * (value-offset) for the plural argument value.
     139             :      * The value is undefined and currently always 0.
     140             :      * @stable ICU 4.8
     141             :      */
     142             :     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
     143             :     /**
     144             :      * Start of an argument.
     145             :      * The length is 1 for the '{'.
     146             :      * The value is the ordinal value of the ArgType. Use getArgType().
     147             :      * <p>
     148             :      * This part is followed by either an ARG_NUMBER or ARG_NAME,
     149             :      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
     150             :      * and finally an ARG_LIMIT part.
     151             :      * @stable ICU 4.8
     152             :      */
     153             :     UMSGPAT_PART_TYPE_ARG_START,
     154             :     /**
     155             :      * End of an argument.
     156             :      * The length is 1 for the '}'.
     157             :      * The value is the ordinal value of the ArgType. Use getArgType().
     158             :      * @stable ICU 4.8
     159             :      */
     160             :     UMSGPAT_PART_TYPE_ARG_LIMIT,
     161             :     /**
     162             :      * The argument number, provided by the value.
     163             :      * @stable ICU 4.8
     164             :      */
     165             :     UMSGPAT_PART_TYPE_ARG_NUMBER,
     166             :     /**
     167             :      * The argument name.
     168             :      * The value is undefined and currently always 0.
     169             :      * @stable ICU 4.8
     170             :      */
     171             :     UMSGPAT_PART_TYPE_ARG_NAME,
     172             :     /**
     173             :      * The argument type.
     174             :      * The value is undefined and currently always 0.
     175             :      * @stable ICU 4.8
     176             :      */
     177             :     UMSGPAT_PART_TYPE_ARG_TYPE,
     178             :     /**
     179             :      * The argument style text.
     180             :      * The value is undefined and currently always 0.
     181             :      * @stable ICU 4.8
     182             :      */
     183             :     UMSGPAT_PART_TYPE_ARG_STYLE,
     184             :     /**
     185             :      * A selector substring in a "complex" argument style.
     186             :      * The value is undefined and currently always 0.
     187             :      * @stable ICU 4.8
     188             :      */
     189             :     UMSGPAT_PART_TYPE_ARG_SELECTOR,
     190             :     /**
     191             :      * An integer value, for example the offset or an explicit selector value
     192             :      * in a PluralFormat style.
     193             :      * The part value is the integer value.
     194             :      * @stable ICU 4.8
     195             :      */
     196             :     UMSGPAT_PART_TYPE_ARG_INT,
     197             :     /**
     198             :      * A numeric value, for example the offset or an explicit selector value
     199             :      * in a PluralFormat style.
     200             :      * The part value is an index into an internal array of numeric values;
     201             :      * use getNumericValue().
     202             :      * @stable ICU 4.8
     203             :      */
     204             :     UMSGPAT_PART_TYPE_ARG_DOUBLE
     205             : };
     206             : /**
     207             :  * @stable ICU 4.8
     208             :  */
     209             : typedef enum UMessagePatternPartType UMessagePatternPartType;
     210             : 
     211             : /**
     212             :  * Argument type constants.
     213             :  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
     214             :  *
     215             :  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
     216             :  * with a nesting level one greater than the surrounding message.
     217             :  * @stable ICU 4.8
     218             :  */
     219             : enum UMessagePatternArgType {
     220             :     /**
     221             :      * The argument has no specified type.
     222             :      * @stable ICU 4.8
     223             :      */
     224             :     UMSGPAT_ARG_TYPE_NONE,
     225             :     /**
     226             :      * The argument has a "simple" type which is provided by the ARG_TYPE part.
     227             :      * An ARG_STYLE part might follow that.
     228             :      * @stable ICU 4.8
     229             :      */
     230             :     UMSGPAT_ARG_TYPE_SIMPLE,
     231             :     /**
     232             :      * The argument is a ChoiceFormat with one or more
     233             :      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
     234             :      * @stable ICU 4.8
     235             :      */
     236             :     UMSGPAT_ARG_TYPE_CHOICE,
     237             :     /**
     238             :      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
     239             :      * (e.g., offset:1)
     240             :      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
     241             :      * If the selector has an explicit value (e.g., =2), then
     242             :      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
     243             :      * Otherwise the message immediately follows the ARG_SELECTOR.
     244             :      * @stable ICU 4.8
     245             :      */
     246             :     UMSGPAT_ARG_TYPE_PLURAL,
     247             :     /**
     248             :      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
     249             :      * @stable ICU 4.8
     250             :      */
     251             :     UMSGPAT_ARG_TYPE_SELECT,
     252             :     /**
     253             :      * The argument is an ordinal-number PluralFormat
     254             :      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
     255             :      * @stable ICU 50
     256             :      */
     257             :     UMSGPAT_ARG_TYPE_SELECTORDINAL
     258             : };
     259             : /**
     260             :  * @stable ICU 4.8
     261             :  */
     262             : typedef enum UMessagePatternArgType UMessagePatternArgType;
     263             : 
     264             : /**
     265             :  * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
     266             :  * Returns TRUE if the argument type has a plural style part sequence and semantics,
     267             :  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
     268             :  * @stable ICU 50
     269             :  */
     270             : #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
     271             :     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
     272             : 
     273             : enum {
     274             :     /**
     275             :      * Return value from MessagePattern.validateArgumentName() for when
     276             :      * the string is a valid "pattern identifier" but not a number.
     277             :      * @stable ICU 4.8
     278             :      */
     279             :     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
     280             : 
     281             :     /**
     282             :      * Return value from MessagePattern.validateArgumentName() for when
     283             :      * the string is invalid.
     284             :      * It might not be a valid "pattern identifier",
     285             :      * or it have only ASCII digits but there is a leading zero or the number is too large.
     286             :      * @stable ICU 4.8
     287             :      */
     288             :     UMSGPAT_ARG_NAME_NOT_VALID=-2
     289             : };
     290             : 
     291             : /**
     292             :  * Special value that is returned by getNumericValue(Part) when no
     293             :  * numeric value is defined for a part.
     294             :  * @see MessagePattern.getNumericValue()
     295             :  * @stable ICU 4.8
     296             :  */
     297             : #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
     298             : 
     299             : U_NAMESPACE_BEGIN
     300             : 
     301             : class MessagePatternDoubleList;
     302             : class MessagePatternPartsList;
     303             : 
     304             : /**
     305             :  * Parses and represents ICU MessageFormat patterns.
     306             :  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
     307             :  * Used in the implementations of those classes as well as in tools
     308             :  * for message validation, translation and format conversion.
     309             :  * <p>
     310             :  * The parser handles all syntax relevant for identifying message arguments.
     311             :  * This includes "complex" arguments whose style strings contain
     312             :  * nested MessageFormat pattern substrings.
     313             :  * For "simple" arguments (with no nested MessageFormat pattern substrings),
     314             :  * the argument style is not parsed any further.
     315             :  * <p>
     316             :  * The parser handles named and numbered message arguments and allows both in one message.
     317             :  * <p>
     318             :  * Once a pattern has been parsed successfully, iterate through the parsed data
     319             :  * with countParts(), getPart() and related methods.
     320             :  * <p>
     321             :  * The data logically represents a parse tree, but is stored and accessed
     322             :  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
     323             :  * Arguments and nested messages are best handled via recursion.
     324             :  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
     325             :  * the index of the corresponding _LIMIT "part".
     326             :  * <p>
     327             :  * List of "parts":
     328             :  * <pre>
     329             :  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
     330             :  * argument = noneArg | simpleArg | complexArg
     331             :  * complexArg = choiceArg | pluralArg | selectArg
     332             :  *
     333             :  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
     334             :  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
     335             :  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
     336             :  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
     337             :  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
     338             :  *
     339             :  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
     340             :  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
     341             :  * selectStyle = (ARG_SELECTOR message)+
     342             :  * </pre>
     343             :  * <ul>
     344             :  *   <li>Literal output text is not represented directly by "parts" but accessed
     345             :  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
     346             :  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
     347             :  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
     348             :  *       the less-than-or-equal-to sign (U+2264).
     349             :  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
     350             :  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
     351             :  *       is the value of an explicit-number selector like "=2",
     352             :  *       otherwise the selector is a non-numeric identifier.
     353             :  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
     354             :  * </ul>
     355             :  * <p>
     356             :  * This class is not intended for public subclassing.
     357             :  *
     358             :  * @stable ICU 4.8
     359             :  */
     360             : class U_COMMON_API MessagePattern : public UObject {
     361             : public:
     362             :     /**
     363             :      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
     364             :      * @param errorCode Standard ICU error code. Its input value must
     365             :      *                  pass the U_SUCCESS() test, or else the function returns
     366             :      *                  immediately. Check for U_FAILURE() on output or use with
     367             :      *                  function chaining. (See User Guide for details.)
     368             :      * @stable ICU 4.8
     369             :      */
     370             :     MessagePattern(UErrorCode &errorCode);
     371             : 
     372             :     /**
     373             :      * Constructs an empty MessagePattern.
     374             :      * @param mode Explicit UMessagePatternApostropheMode.
     375             :      * @param errorCode Standard ICU error code. Its input value must
     376             :      *                  pass the U_SUCCESS() test, or else the function returns
     377             :      *                  immediately. Check for U_FAILURE() on output or use with
     378             :      *                  function chaining. (See User Guide for details.)
     379             :      * @stable ICU 4.8
     380             :      */
     381             :     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
     382             : 
     383             :     /**
     384             :      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
     385             :      * parses the MessageFormat pattern string.
     386             :      * @param pattern a MessageFormat pattern string
     387             :      * @param parseError Struct to receive information on the position
     388             :      *                   of an error within the pattern.
     389             :      *                   Can be NULL.
     390             :      * @param errorCode Standard ICU error code. Its input value must
     391             :      *                  pass the U_SUCCESS() test, or else the function returns
     392             :      *                  immediately. Check for U_FAILURE() on output or use with
     393             :      *                  function chaining. (See User Guide for details.)
     394             :      * TODO: turn @throws into UErrorCode specifics?
     395             :      * @throws IllegalArgumentException for syntax errors in the pattern string
     396             :      * @throws IndexOutOfBoundsException if certain limits are exceeded
     397             :      *         (e.g., argument number too high, argument name too long, etc.)
     398             :      * @throws NumberFormatException if a number could not be parsed
     399             :      * @stable ICU 4.8
     400             :      */
     401             :     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
     402             : 
     403             :     /**
     404             :      * Copy constructor.
     405             :      * @param other Object to copy.
     406             :      * @stable ICU 4.8
     407             :      */
     408             :     MessagePattern(const MessagePattern &other);
     409             : 
     410             :     /**
     411             :      * Assignment operator.
     412             :      * @param other Object to copy.
     413             :      * @return *this=other
     414             :      * @stable ICU 4.8
     415             :      */
     416             :     MessagePattern &operator=(const MessagePattern &other);
     417             : 
     418             :     /**
     419             :      * Destructor.
     420             :      * @stable ICU 4.8
     421             :      */
     422             :     virtual ~MessagePattern();
     423             : 
     424             :     /**
     425             :      * Parses a MessageFormat pattern string.
     426             :      * @param pattern a MessageFormat pattern string
     427             :      * @param parseError Struct to receive information on the position
     428             :      *                   of an error within the pattern.
     429             :      *                   Can be NULL.
     430             :      * @param errorCode Standard ICU error code. Its input value must
     431             :      *                  pass the U_SUCCESS() test, or else the function returns
     432             :      *                  immediately. Check for U_FAILURE() on output or use with
     433             :      *                  function chaining. (See User Guide for details.)
     434             :      * @return *this
     435             :      * @throws IllegalArgumentException for syntax errors in the pattern string
     436             :      * @throws IndexOutOfBoundsException if certain limits are exceeded
     437             :      *         (e.g., argument number too high, argument name too long, etc.)
     438             :      * @throws NumberFormatException if a number could not be parsed
     439             :      * @stable ICU 4.8
     440             :      */
     441             :     MessagePattern &parse(const UnicodeString &pattern,
     442             :                           UParseError *parseError, UErrorCode &errorCode);
     443             : 
     444             :     /**
     445             :      * Parses a ChoiceFormat pattern string.
     446             :      * @param pattern a ChoiceFormat pattern string
     447             :      * @param parseError Struct to receive information on the position
     448             :      *                   of an error within the pattern.
     449             :      *                   Can be NULL.
     450             :      * @param errorCode Standard ICU error code. Its input value must
     451             :      *                  pass the U_SUCCESS() test, or else the function returns
     452             :      *                  immediately. Check for U_FAILURE() on output or use with
     453             :      *                  function chaining. (See User Guide for details.)
     454             :      * @return *this
     455             :      * @throws IllegalArgumentException for syntax errors in the pattern string
     456             :      * @throws IndexOutOfBoundsException if certain limits are exceeded
     457             :      *         (e.g., argument number too high, argument name too long, etc.)
     458             :      * @throws NumberFormatException if a number could not be parsed
     459             :      * @stable ICU 4.8
     460             :      */
     461             :     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
     462             :                                      UParseError *parseError, UErrorCode &errorCode);
     463             : 
     464             :     /**
     465             :      * Parses a PluralFormat pattern string.
     466             :      * @param pattern a PluralFormat pattern string
     467             :      * @param parseError Struct to receive information on the position
     468             :      *                   of an error within the pattern.
     469             :      *                   Can be NULL.
     470             :      * @param errorCode Standard ICU error code. Its input value must
     471             :      *                  pass the U_SUCCESS() test, or else the function returns
     472             :      *                  immediately. Check for U_FAILURE() on output or use with
     473             :      *                  function chaining. (See User Guide for details.)
     474             :      * @return *this
     475             :      * @throws IllegalArgumentException for syntax errors in the pattern string
     476             :      * @throws IndexOutOfBoundsException if certain limits are exceeded
     477             :      *         (e.g., argument number too high, argument name too long, etc.)
     478             :      * @throws NumberFormatException if a number could not be parsed
     479             :      * @stable ICU 4.8
     480             :      */
     481             :     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
     482             :                                      UParseError *parseError, UErrorCode &errorCode);
     483             : 
     484             :     /**
     485             :      * Parses a SelectFormat pattern string.
     486             :      * @param pattern a SelectFormat pattern string
     487             :      * @param parseError Struct to receive information on the position
     488             :      *                   of an error within the pattern.
     489             :      *                   Can be NULL.
     490             :      * @param errorCode Standard ICU error code. Its input value must
     491             :      *                  pass the U_SUCCESS() test, or else the function returns
     492             :      *                  immediately. Check for U_FAILURE() on output or use with
     493             :      *                  function chaining. (See User Guide for details.)
     494             :      * @return *this
     495             :      * @throws IllegalArgumentException for syntax errors in the pattern string
     496             :      * @throws IndexOutOfBoundsException if certain limits are exceeded
     497             :      *         (e.g., argument number too high, argument name too long, etc.)
     498             :      * @throws NumberFormatException if a number could not be parsed
     499             :      * @stable ICU 4.8
     500             :      */
     501             :     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
     502             :                                      UParseError *parseError, UErrorCode &errorCode);
     503             : 
     504             :     /**
     505             :      * Clears this MessagePattern.
     506             :      * countParts() will return 0.
     507             :      * @stable ICU 4.8
     508             :      */
     509             :     void clear();
     510             : 
     511             :     /**
     512             :      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
     513             :      * countParts() will return 0.
     514             :      * @param mode The new UMessagePatternApostropheMode.
     515             :      * @stable ICU 4.8
     516             :      */
     517           0 :     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
     518           0 :         clear();
     519           0 :         aposMode=mode;
     520           0 :     }
     521             : 
     522             :     /**
     523             :      * @param other another object to compare with.
     524             :      * @return TRUE if this object is equivalent to the other one.
     525             :      * @stable ICU 4.8
     526             :      */
     527             :     UBool operator==(const MessagePattern &other) const;
     528             : 
     529             :     /**
     530             :      * @param other another object to compare with.
     531             :      * @return FALSE if this object is equivalent to the other one.
     532             :      * @stable ICU 4.8
     533             :      */
     534           0 :     inline UBool operator!=(const MessagePattern &other) const {
     535           0 :         return !operator==(other);
     536             :     }
     537             : 
     538             :     /**
     539             :      * @return A hash code for this object.
     540             :      * @stable ICU 4.8
     541             :      */
     542             :     int32_t hashCode() const;
     543             : 
     544             :     /**
     545             :      * @return this instance's UMessagePatternApostropheMode.
     546             :      * @stable ICU 4.8
     547             :      */
     548           0 :     UMessagePatternApostropheMode getApostropheMode() const {
     549           0 :         return aposMode;
     550             :     }
     551             : 
     552             :     // Java has package-private jdkAposMode() here.
     553             :     // In C++, this is declared in the MessageImpl class.
     554             : 
     555             :     /**
     556             :      * @return the parsed pattern string (null if none was parsed).
     557             :      * @stable ICU 4.8
     558             :      */
     559           0 :     const UnicodeString &getPatternString() const {
     560           0 :         return msg;
     561             :     }
     562             : 
     563             :     /**
     564             :      * Does the parsed pattern have named arguments like {first_name}?
     565             :      * @return TRUE if the parsed pattern has at least one named argument.
     566             :      * @stable ICU 4.8
     567             :      */
     568           0 :     UBool hasNamedArguments() const {
     569           0 :         return hasArgNames;
     570             :     }
     571             : 
     572             :     /**
     573             :      * Does the parsed pattern have numbered arguments like {2}?
     574             :      * @return TRUE if the parsed pattern has at least one numbered argument.
     575             :      * @stable ICU 4.8
     576             :      */
     577             :     UBool hasNumberedArguments() const {
     578             :         return hasArgNumbers;
     579             :     }
     580             : 
     581             :     /**
     582             :      * Validates and parses an argument name or argument number string.
     583             :      * An argument name must be a "pattern identifier", that is, it must contain
     584             :      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
     585             :      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
     586             :      * @param name Input string.
     587             :      * @return &gt;=0 if the name is a valid number,
     588             :      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
     589             :      *         ARG_NAME_NOT_VALID (-2) if it is neither.
     590             :      * @stable ICU 4.8
     591             :      */
     592             :     static int32_t validateArgumentName(const UnicodeString &name);
     593             : 
     594             :     /**
     595             :      * Returns a version of the parsed pattern string where each ASCII apostrophe
     596             :      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
     597             :      * <p>
     598             :      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
     599             :      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
     600             :      * @return the deep-auto-quoted version of the parsed pattern string.
     601             :      * @see MessageFormat.autoQuoteApostrophe()
     602             :      * @stable ICU 4.8
     603             :      */
     604             :     UnicodeString autoQuoteApostropheDeep() const;
     605             : 
     606             :     class Part;
     607             : 
     608             :     /**
     609             :      * Returns the number of "parts" created by parsing the pattern string.
     610             :      * Returns 0 if no pattern has been parsed or clear() was called.
     611             :      * @return the number of pattern parts.
     612             :      * @stable ICU 4.8
     613             :      */
     614           0 :     int32_t countParts() const {
     615           0 :         return partsLength;
     616             :     }
     617             : 
     618             :     /**
     619             :      * Gets the i-th pattern "part".
     620             :      * @param i The index of the Part data. (0..countParts()-1)
     621             :      * @return the i-th pattern "part".
     622             :      * @stable ICU 4.8
     623             :      */
     624           0 :     const Part &getPart(int32_t i) const {
     625           0 :         return parts[i];
     626             :     }
     627             : 
     628             :     /**
     629             :      * Returns the UMessagePatternPartType of the i-th pattern "part".
     630             :      * Convenience method for getPart(i).getType().
     631             :      * @param i The index of the Part data. (0..countParts()-1)
     632             :      * @return The UMessagePatternPartType of the i-th Part.
     633             :      * @stable ICU 4.8
     634             :      */
     635           0 :     UMessagePatternPartType getPartType(int32_t i) const {
     636           0 :         return getPart(i).type;
     637             :     }
     638             : 
     639             :     /**
     640             :      * Returns the pattern index of the specified pattern "part".
     641             :      * Convenience method for getPart(partIndex).getIndex().
     642             :      * @param partIndex The index of the Part data. (0..countParts()-1)
     643             :      * @return The pattern index of this Part.
     644             :      * @stable ICU 4.8
     645             :      */
     646           0 :     int32_t getPatternIndex(int32_t partIndex) const {
     647           0 :         return getPart(partIndex).index;
     648             :     }
     649             : 
     650             :     /**
     651             :      * Returns the substring of the pattern string indicated by the Part.
     652             :      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
     653             :      * @param part a part of this MessagePattern.
     654             :      * @return the substring associated with part.
     655             :      * @stable ICU 4.8
     656             :      */
     657           0 :     UnicodeString getSubstring(const Part &part) const {
     658           0 :         return msg.tempSubString(part.index, part.length);
     659             :     }
     660             : 
     661             :     /**
     662             :      * Compares the part's substring with the input string s.
     663             :      * @param part a part of this MessagePattern.
     664             :      * @param s a string.
     665             :      * @return TRUE if getSubstring(part).equals(s).
     666             :      * @stable ICU 4.8
     667             :      */
     668           0 :     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
     669           0 :         return 0==msg.compare(part.index, part.length, s);
     670             :     }
     671             : 
     672             :     /**
     673             :      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
     674             :      * @param part a part of this MessagePattern.
     675             :      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
     676             :      * @stable ICU 4.8
     677             :      */
     678             :     double getNumericValue(const Part &part) const;
     679             : 
     680             :     /**
     681             :      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
     682             :      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
     683             :      * @return the "offset:" value.
     684             :      * @stable ICU 4.8
     685             :      */
     686             :     double getPluralOffset(int32_t pluralStart) const;
     687             : 
     688             :     /**
     689             :      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
     690             :      * @param start The index of some Part data (0..countParts()-1);
     691             :      *        this Part should be of Type ARG_START or MSG_START.
     692             :      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
     693             :      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
     694             :      * @stable ICU 4.8
     695             :      */
     696           0 :     int32_t getLimitPartIndex(int32_t start) const {
     697           0 :         int32_t limit=getPart(start).limitPartIndex;
     698           0 :         if(limit<start) {
     699           0 :             return start;
     700             :         }
     701           0 :         return limit;
     702             :     }
     703             : 
     704             :     /**
     705             :      * A message pattern "part", representing a pattern parsing event.
     706             :      * There is a part for the start and end of a message or argument,
     707             :      * for quoting and escaping of and with ASCII apostrophes,
     708             :      * and for syntax elements of "complex" arguments.
     709             :      * @stable ICU 4.8
     710             :      */
     711             :     class Part : public UMemory {
     712             :     public:
     713             :         /**
     714             :          * Default constructor, do not use.
     715             :          * @internal
     716             :          */
     717           0 :         Part() {}
     718             : 
     719             :         /**
     720             :          * Returns the type of this part.
     721             :          * @return the part type.
     722             :          * @stable ICU 4.8
     723             :          */
     724           0 :         UMessagePatternPartType getType() const {
     725           0 :             return type;
     726             :         }
     727             : 
     728             :         /**
     729             :          * Returns the pattern string index associated with this Part.
     730             :          * @return this part's pattern string index.
     731             :          * @stable ICU 4.8
     732             :          */
     733           0 :         int32_t getIndex() const {
     734           0 :             return index;
     735             :         }
     736             : 
     737             :         /**
     738             :          * Returns the length of the pattern substring associated with this Part.
     739             :          * This is 0 for some parts.
     740             :          * @return this part's pattern substring length.
     741             :          * @stable ICU 4.8
     742             :          */
     743             :         int32_t getLength() const {
     744             :             return length;
     745             :         }
     746             : 
     747             :         /**
     748             :          * Returns the pattern string limit (exclusive-end) index associated with this Part.
     749             :          * Convenience method for getIndex()+getLength().
     750             :          * @return this part's pattern string limit index, same as getIndex()+getLength().
     751             :          * @stable ICU 4.8
     752             :          */
     753           0 :         int32_t getLimit() const {
     754           0 :             return index+length;
     755             :         }
     756             : 
     757             :         /**
     758             :          * Returns a value associated with this part.
     759             :          * See the documentation of each part type for details.
     760             :          * @return the part value.
     761             :          * @stable ICU 4.8
     762             :          */
     763           0 :         int32_t getValue() const {
     764           0 :             return value;
     765             :         }
     766             : 
     767             :         /**
     768             :          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
     769             :          * otherwise UMSGPAT_ARG_TYPE_NONE.
     770             :          * @return the argument type for this part.
     771             :          * @stable ICU 4.8
     772             :          */
     773           0 :         UMessagePatternArgType getArgType() const {
     774           0 :             UMessagePatternPartType type=getType();
     775           0 :             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
     776           0 :                 return (UMessagePatternArgType)value;
     777             :             } else {
     778           0 :                 return UMSGPAT_ARG_TYPE_NONE;
     779             :             }
     780             :         }
     781             : 
     782             :         /**
     783             :          * Indicates whether the Part type has a numeric value.
     784             :          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
     785             :          * @param type The Part type to be tested.
     786             :          * @return TRUE if the Part type has a numeric value.
     787             :          * @stable ICU 4.8
     788             :          */
     789           0 :         static UBool hasNumericValue(UMessagePatternPartType type) {
     790           0 :             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
     791             :         }
     792             : 
     793             :         /**
     794             :          * @param other another object to compare with.
     795             :          * @return TRUE if this object is equivalent to the other one.
     796             :          * @stable ICU 4.8
     797             :          */
     798             :         UBool operator==(const Part &other) const;
     799             : 
     800             :         /**
     801             :          * @param other another object to compare with.
     802             :          * @return FALSE if this object is equivalent to the other one.
     803             :          * @stable ICU 4.8
     804             :          */
     805           0 :         inline UBool operator!=(const Part &other) const {
     806           0 :             return !operator==(other);
     807             :         }
     808             : 
     809             :         /**
     810             :          * @return A hash code for this object.
     811             :          * @stable ICU 4.8
     812             :          */
     813           0 :         int32_t hashCode() const {
     814           0 :             return ((type*37+index)*37+length)*37+value;
     815             :         }
     816             : 
     817             :     private:
     818             :         friend class MessagePattern;
     819             : 
     820             :         static const int32_t MAX_LENGTH=0xffff;
     821             :         static const int32_t MAX_VALUE=0x7fff;
     822             : 
     823             :         // Some fields are not final because they are modified during pattern parsing.
     824             :         // After pattern parsing, the parts are effectively immutable.
     825             :         UMessagePatternPartType type;
     826             :         int32_t index;
     827             :         uint16_t length;
     828             :         int16_t value;
     829             :         int32_t limitPartIndex;
     830             :     };
     831             : 
     832             : private:
     833             :     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
     834             : 
     835             :     void postParse();
     836             : 
     837             :     int32_t parseMessage(int32_t index, int32_t msgStartLength,
     838             :                          int32_t nestingLevel, UMessagePatternArgType parentType,
     839             :                          UParseError *parseError, UErrorCode &errorCode);
     840             : 
     841             :     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
     842             :                      UParseError *parseError, UErrorCode &errorCode);
     843             : 
     844             :     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
     845             : 
     846             :     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
     847             :                              UParseError *parseError, UErrorCode &errorCode);
     848             : 
     849             :     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
     850             :                                      UParseError *parseError, UErrorCode &errorCode);
     851             : 
     852             :     /**
     853             :      * Validates and parses an argument name or argument number string.
     854             :      * This internal method assumes that the input substring is a "pattern identifier".
     855             :      * @return &gt;=0 if the name is a valid number,
     856             :      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
     857             :      *         ARG_NAME_NOT_VALID (-2) if it is neither.
     858             :      * @see #validateArgumentName(String)
     859             :      */
     860             :     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
     861             : 
     862           0 :     int32_t parseArgNumber(int32_t start, int32_t limit) {
     863           0 :         return parseArgNumber(msg, start, limit);
     864             :     }
     865             : 
     866             :     /**
     867             :      * Parses a number from the specified message substring.
     868             :      * @param start start index into the message string
     869             :      * @param limit limit index into the message string, must be start<limit
     870             :      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
     871             :      * @param parseError
     872             :      * @param errorCode
     873             :      */
     874             :     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
     875             :                      UParseError *parseError, UErrorCode &errorCode);
     876             : 
     877             :     // Java has package-private appendReducedApostrophes() here.
     878             :     // In C++, this is declared in the MessageImpl class.
     879             : 
     880             :     int32_t skipWhiteSpace(int32_t index);
     881             : 
     882             :     int32_t skipIdentifier(int32_t index);
     883             : 
     884             :     /**
     885             :      * Skips a sequence of characters that could occur in a double value.
     886             :      * Does not fully parse or validate the value.
     887             :      */
     888             :     int32_t skipDouble(int32_t index);
     889             : 
     890             :     static UBool isArgTypeChar(UChar32 c);
     891             : 
     892             :     UBool isChoice(int32_t index);
     893             : 
     894             :     UBool isPlural(int32_t index);
     895             : 
     896             :     UBool isSelect(int32_t index);
     897             : 
     898             :     UBool isOrdinal(int32_t index);
     899             : 
     900             :     /**
     901             :      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
     902             :      *         as opposed to inside a top-level choice/plural/select pattern.
     903             :      */
     904             :     UBool inMessageFormatPattern(int32_t nestingLevel);
     905             : 
     906             :     /**
     907             :      * @return TRUE if we are in a MessageFormat sub-pattern
     908             :      *         of a top-level ChoiceFormat pattern.
     909             :      */
     910             :     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
     911             : 
     912             :     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
     913             :                  int32_t value, UErrorCode &errorCode);
     914             : 
     915             :     void addLimitPart(int32_t start,
     916             :                       UMessagePatternPartType type, int32_t index, int32_t length,
     917             :                       int32_t value, UErrorCode &errorCode);
     918             : 
     919             :     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
     920             : 
     921             :     void setParseError(UParseError *parseError, int32_t index);
     922             : 
     923             :     UBool init(UErrorCode &errorCode);
     924             :     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
     925             : 
     926             :     UMessagePatternApostropheMode aposMode;
     927             :     UnicodeString msg;
     928             :     // ArrayList<Part> parts=new ArrayList<Part>();
     929             :     MessagePatternPartsList *partsList;
     930             :     Part *parts;
     931             :     int32_t partsLength;
     932             :     // ArrayList<Double> numericValues;
     933             :     MessagePatternDoubleList *numericValuesList;
     934             :     double *numericValues;
     935             :     int32_t numericValuesLength;
     936             :     UBool hasArgNames;
     937             :     UBool hasArgNumbers;
     938             :     UBool needsAutoQuoting;
     939             : };
     940             : 
     941             : U_NAMESPACE_END
     942             : 
     943             : #endif  // !UCONFIG_NO_FORMATTING
     944             : 
     945             : #endif  // __MESSAGEPATTERN_H__

Generated by: LCOV version 1.13