Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : *******************************************************************************
5 : * Copyright (C) 2015, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : *******************************************************************************
8 : * affixpatternparser.h
9 : *
10 : * created on: 2015jan06
11 : * created by: Travis Keep
12 : */
13 :
14 : #ifndef __AFFIX_PATTERN_PARSER_H__
15 : #define __AFFIX_PATTERN_PARSER_H__
16 :
17 : #include "unicode/utypes.h"
18 :
19 : #if !UCONFIG_NO_FORMATTING
20 :
21 : #include "unicode/unistr.h"
22 : #include "unicode/uobject.h"
23 : #include "pluralaffix.h"
24 :
25 : U_NAMESPACE_BEGIN
26 :
27 : class PluralRules;
28 : class FixedPrecision;
29 : class DecimalFormatSymbols;
30 :
31 : /**
32 : * A representation of the various forms of a particular currency according
33 : * to some locale and usage context.
34 : *
35 : * Includes the symbol, ISO code form, and long form(s) of the currency name
36 : * for each plural variation.
37 : */
38 0 : class U_I18N_API CurrencyAffixInfo : public UMemory {
39 : public:
40 : /**
41 : * Symbol is \u00a4; ISO form is \u00a4\u00a4;
42 : * long form is \u00a4\u00a4\u00a4.
43 : */
44 : CurrencyAffixInfo();
45 :
46 0 : const UnicodeString &getSymbol() const { return fSymbol; }
47 0 : const UnicodeString &getISO() const { return fISO; }
48 0 : const PluralAffix &getLong() const { return fLong; }
49 0 : void setSymbol(const UnicodeString &symbol) {
50 0 : fSymbol = symbol;
51 0 : fIsDefault = FALSE;
52 0 : }
53 0 : void setISO(const UnicodeString &iso) {
54 0 : fISO = iso;
55 0 : fIsDefault = FALSE;
56 0 : }
57 : UBool
58 0 : equals(const CurrencyAffixInfo &other) const {
59 0 : return (fSymbol == other.fSymbol)
60 0 : && (fISO == other.fISO)
61 0 : && (fLong.equals(other.fLong))
62 0 : && (fIsDefault == other.fIsDefault);
63 : }
64 :
65 : /**
66 : * Intializes this instance.
67 : *
68 : * @param locale the locale for the currency forms.
69 : * @param rules The plural rules for the locale.
70 : * @param currency the null terminated, 3 character ISO code of the
71 : * currency. If NULL, resets this instance as if it were just created.
72 : * In this case, the first 2 parameters may be NULL as well.
73 : * @param status any error returned here.
74 : */
75 : void set(
76 : const char *locale, const PluralRules *rules,
77 : const UChar *currency, UErrorCode &status);
78 :
79 : /**
80 : * Returns true if this instance is the default. That is has no real
81 : * currency. For instance never initialized with set()
82 : * or reset with set(NULL, NULL, NULL, status).
83 : */
84 0 : UBool isDefault() const { return fIsDefault; }
85 :
86 : /**
87 : * Adjusts the precision used for a particular currency.
88 : * @param currency the null terminated, 3 character ISO code of the
89 : * currency.
90 : * @param usage the usage of the currency
91 : * @param precision min/max fraction digits and rounding increment
92 : * adjusted.
93 : * @params status any error reported here.
94 : */
95 : static void adjustPrecision(
96 : const UChar *currency, const UCurrencyUsage usage,
97 : FixedPrecision &precision, UErrorCode &status);
98 :
99 : private:
100 : /**
101 : * The symbol form of the currency.
102 : */
103 : UnicodeString fSymbol;
104 :
105 : /**
106 : * The ISO form of the currency, usually three letter abbreviation.
107 : */
108 : UnicodeString fISO;
109 :
110 : /**
111 : * The long forms of the currency keyed by plural variation.
112 : */
113 : PluralAffix fLong;
114 :
115 : UBool fIsDefault;
116 :
117 : };
118 :
119 : class AffixPatternIterator;
120 :
121 : /**
122 : * A locale agnostic representation of an affix pattern.
123 : */
124 0 : class U_I18N_API AffixPattern : public UMemory {
125 : public:
126 :
127 : /**
128 : * The token types that can appear in an affix pattern.
129 : */
130 : enum ETokenType {
131 : kLiteral,
132 : kPercent,
133 : kPerMill,
134 : kCurrency,
135 : kNegative,
136 : kPositive
137 : };
138 :
139 : /**
140 : * An empty affix pattern.
141 : */
142 0 : AffixPattern()
143 0 : : tokens(), literals(), hasCurrencyToken(FALSE),
144 0 : hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
145 0 : }
146 :
147 : /**
148 : * Adds a string literal to this affix pattern.
149 : */
150 : void addLiteral(const UChar *, int32_t start, int32_t len);
151 :
152 : /**
153 : * Adds a token to this affix pattern. t must not be kLiteral as
154 : * the addLiteral() method adds literals.
155 : * @param t the token type to add
156 : */
157 : void add(ETokenType t);
158 :
159 : /**
160 : * Adds a currency token with specific count to this affix pattern.
161 : * @param count the token count. Used to distinguish between
162 : * one, two, or three currency symbols. Note that adding a currency
163 : * token with count=2 (Use ISO code) is different than adding two
164 : * currency tokens each with count=1 (two currency symbols).
165 : */
166 : void addCurrency(uint8_t count);
167 :
168 : /**
169 : * Makes this instance be an empty affix pattern.
170 : */
171 : void remove();
172 :
173 : /**
174 : * Provides an iterator over the tokens in this instance.
175 : * @param result this is initialized to point just before the
176 : * first token of this instance. Caller must call nextToken()
177 : * on the iterator once it is set up to have it actually point
178 : * to the first token. This first call to nextToken() will return
179 : * FALSE if the AffixPattern being iterated over is empty.
180 : * @return result
181 : */
182 : AffixPatternIterator &iterator(AffixPatternIterator &result) const;
183 :
184 : /**
185 : * Returns TRUE if this instance has currency tokens in it.
186 : */
187 0 : UBool usesCurrency() const {
188 0 : return hasCurrencyToken;
189 : }
190 :
191 0 : UBool usesPercent() const {
192 0 : return hasPercentToken;
193 : }
194 :
195 0 : UBool usesPermill() const {
196 0 : return hasPermillToken;
197 : }
198 :
199 : /**
200 : * Returns the number of code points a string of this instance
201 : * would have if none of the special tokens were escaped.
202 : * Used to compute the padding size.
203 : */
204 0 : int32_t countChar32() const {
205 0 : return char32Count;
206 : }
207 :
208 : /**
209 : * Appends other to this instance mutating this instance in place.
210 : * @param other The pattern appended to the end of this one.
211 : * @return a reference to this instance for chaining.
212 : */
213 : AffixPattern &append(const AffixPattern &other);
214 :
215 : /**
216 : * Converts this AffixPattern back into a user string.
217 : * It is the inverse of parseUserAffixString.
218 : */
219 : UnicodeString &toUserString(UnicodeString &appendTo) const;
220 :
221 : /**
222 : * Converts this AffixPattern back into a string.
223 : * It is the inverse of parseAffixString.
224 : */
225 : UnicodeString &toString(UnicodeString &appendTo) const;
226 :
227 : /**
228 : * Parses an affix pattern string appending it to an AffixPattern.
229 : * Parses affix pattern strings produced from using
230 : * DecimalFormatPatternParser to parse a format pattern. Affix patterns
231 : * include the positive prefix and suffix and the negative prefix
232 : * and suffix. This method expects affix patterns strings to be in the
233 : * same format that DecimalFormatPatternParser produces. Namely special
234 : * characters in the affix that correspond to a field type must be
235 : * prefixed with an apostrophe ('). These special character sequences
236 : * inluce minus (-), percent (%), permile (U+2030), plus (+),
237 : * short currency (U+00a4), medium currency (u+00a4 * 2),
238 : * long currency (u+a4 * 3), and apostrophe (')
239 : * (apostrophe does not correspond to a field type but has to be escaped
240 : * because it itself is the escape character).
241 : * Since the expansion of these special character
242 : * sequences is locale dependent, these sequences are not expanded in
243 : * an AffixPattern instance.
244 : * If these special characters are not prefixed with an apostrophe in
245 : * the affix pattern string, then they are treated verbatim just as
246 : * any other character. If an apostrophe prefixes a non special
247 : * character in the affix pattern, the apostrophe is simply ignored.
248 : *
249 : * @param affixStr the string from DecimalFormatPatternParser
250 : * @param appendTo parsed result appended here.
251 : * @param status any error parsing returned here.
252 : */
253 : static AffixPattern &parseAffixString(
254 : const UnicodeString &affixStr,
255 : AffixPattern &appendTo,
256 : UErrorCode &status);
257 :
258 : /**
259 : * Parses an affix pattern string appending it to an AffixPattern.
260 : * Parses affix pattern strings as the user would supply them.
261 : * In this function, quoting makes special characters like normal
262 : * characters whereas in parseAffixString, quoting makes special
263 : * characters special.
264 : *
265 : * @param affixStr the string from the user
266 : * @param appendTo parsed result appended here.
267 : * @param status any error parsing returned here.
268 : */
269 : static AffixPattern &parseUserAffixString(
270 : const UnicodeString &affixStr,
271 : AffixPattern &appendTo,
272 : UErrorCode &status);
273 :
274 0 : UBool equals(const AffixPattern &other) const {
275 0 : return (tokens == other.tokens)
276 0 : && (literals == other.literals)
277 0 : && (hasCurrencyToken == other.hasCurrencyToken)
278 0 : && (hasPercentToken == other.hasPercentToken)
279 0 : && (hasPermillToken == other.hasPermillToken)
280 0 : && (char32Count == other.char32Count);
281 : }
282 :
283 : private:
284 : /*
285 : * Tokens stored here. Each UChar generally stands for one token. A
286 : * Each token is of form 'etttttttllllllll' llllllll is the length of
287 : * the token and ranges from 0-255. ttttttt is the token type and ranges
288 : * from 0-127. If e is set it means this is an extendo token (to be
289 : * described later). To accomodate token lengths above 255, each normal
290 : * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
291 : * the same type. Right now only kLiteral Tokens have extendo tokens.
292 : * Each extendo token provides the next 8 higher bits for the length.
293 : * If a kLiteral token is followed by 2 extendo tokens then, then the
294 : * llllllll of the next extendo token contains bits 8-15 of the length
295 : * and the last extendo token contains bits 16-23 of the length.
296 : */
297 : UnicodeString tokens;
298 :
299 : /*
300 : * The characters of the kLiteral tokens are concatenated together here.
301 : * The first characters go with the first kLiteral token, the next
302 : * characters go with the next kLiteral token etc.
303 : */
304 : UnicodeString literals;
305 : UBool hasCurrencyToken;
306 : UBool hasPercentToken;
307 : UBool hasPermillToken;
308 : int32_t char32Count;
309 : void add(ETokenType t, uint8_t count);
310 :
311 : };
312 :
313 : /**
314 : * An iterator over the tokens in an AffixPattern instance.
315 : */
316 : class U_I18N_API AffixPatternIterator : public UMemory {
317 : public:
318 :
319 : /**
320 : * Using an iterator without first calling iterator on an AffixPattern
321 : * instance to initialize the iterator results in
322 : * undefined behavior.
323 : */
324 0 : AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
325 : /**
326 : * Advances this iterator to the next token. Returns FALSE when there
327 : * are no more tokens. Calling the other methods after nextToken()
328 : * returns FALSE results in undefined behavior.
329 : */
330 : UBool nextToken();
331 :
332 : /**
333 : * Returns the type of token.
334 : */
335 : AffixPattern::ETokenType getTokenType() const;
336 :
337 : /**
338 : * For literal tokens, returns the literal string. Calling this for
339 : * other token types results in undefined behavior.
340 : * @param result replaced with a read-only alias to the literal string.
341 : * @return result
342 : */
343 : UnicodeString &getLiteral(UnicodeString &result) const;
344 :
345 : /**
346 : * Returns the token length. Usually 1, but for currency tokens may
347 : * be 2 for ISO code and 3 for long form.
348 : */
349 : int32_t getTokenLength() const;
350 : private:
351 : int32_t nextLiteralIndex;
352 : int32_t lastLiteralLength;
353 : int32_t nextTokenIndex;
354 : const UnicodeString *tokens;
355 : const UnicodeString *literals;
356 : friend class AffixPattern;
357 : AffixPatternIterator(const AffixPatternIterator &);
358 : AffixPatternIterator &operator=(const AffixPatternIterator &);
359 : };
360 :
361 : /**
362 : * A locale aware class that converts locale independent AffixPattern
363 : * instances into locale dependent PluralAffix instances.
364 : */
365 0 : class U_I18N_API AffixPatternParser : public UMemory {
366 : public:
367 : AffixPatternParser();
368 : AffixPatternParser(const DecimalFormatSymbols &symbols);
369 : void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
370 :
371 : /**
372 : * Parses affixPattern appending the result to appendTo.
373 : * @param affixPattern The affix pattern.
374 : * @param currencyAffixInfo contains the currency forms.
375 : * @param appendTo The result of parsing affixPattern is appended here.
376 : * @param status any error returned here.
377 : * @return appendTo.
378 : */
379 : PluralAffix &parse(
380 : const AffixPattern &affixPattern,
381 : const CurrencyAffixInfo ¤cyAffixInfo,
382 : PluralAffix &appendTo,
383 : UErrorCode &status) const;
384 :
385 0 : UBool equals(const AffixPatternParser &other) const {
386 0 : return (fPercent == other.fPercent)
387 0 : && (fPermill == other.fPermill)
388 0 : && (fNegative == other.fNegative)
389 0 : && (fPositive == other.fPositive);
390 : }
391 :
392 : private:
393 : UnicodeString fPercent;
394 : UnicodeString fPermill;
395 : UnicodeString fNegative;
396 : UnicodeString fPositive;
397 : };
398 :
399 :
400 : U_NAMESPACE_END
401 : #endif /* #if !UCONFIG_NO_FORMATTING */
402 : #endif // __AFFIX_PATTERN_PARSER_H__
|