Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : ******************************************************************************
5 : * Copyright (C) 2014-2016, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : ******************************************************************************
8 : * simpleformatter.cpp
9 : */
10 :
11 : #include "unicode/utypes.h"
12 : #include "unicode/simpleformatter.h"
13 : #include "unicode/unistr.h"
14 : #include "uassert.h"
15 :
16 : U_NAMESPACE_BEGIN
17 :
18 : namespace {
19 :
20 : /**
21 : * Argument numbers must be smaller than this limit.
22 : * Text segment lengths are offset by this much.
23 : * This is currently the only unused char value in compiled patterns,
24 : * except it is the maximum value of the first unit (max arg +1).
25 : */
26 : const int32_t ARG_NUM_LIMIT = 0x100;
27 : /**
28 : * Initial and maximum char/UChar value set for a text segment.
29 : * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
30 : * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
31 : */
32 : const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
33 : /**
34 : * Maximum length of a text segment. Longer segments are split into shorter ones.
35 : */
36 : const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
37 :
38 : enum {
39 : APOS = 0x27,
40 : DIGIT_ZERO = 0x30,
41 : DIGIT_ONE = 0x31,
42 : DIGIT_NINE = 0x39,
43 : OPEN_BRACE = 0x7b,
44 : CLOSE_BRACE = 0x7d
45 : };
46 :
47 0 : inline UBool isInvalidArray(const void *array, int32_t length) {
48 0 : return (length < 0 || (array == NULL && length != 0));
49 : }
50 :
51 : } // namespace
52 :
53 0 : SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
54 0 : if (this == &other) {
55 0 : return *this;
56 : }
57 0 : compiledPattern = other.compiledPattern;
58 0 : return *this;
59 : }
60 :
61 0 : SimpleFormatter::~SimpleFormatter() {}
62 :
63 0 : UBool SimpleFormatter::applyPatternMinMaxArguments(
64 : const UnicodeString &pattern,
65 : int32_t min, int32_t max,
66 : UErrorCode &errorCode) {
67 0 : if (U_FAILURE(errorCode)) {
68 0 : return FALSE;
69 : }
70 : // Parse consistent with MessagePattern, but
71 : // - support only simple numbered arguments
72 : // - build a simple binary structure into the result string
73 0 : const UChar *patternBuffer = pattern.getBuffer();
74 0 : int32_t patternLength = pattern.length();
75 : // Reserve the first char for the number of arguments.
76 0 : compiledPattern.setTo((UChar)0);
77 0 : int32_t textLength = 0;
78 0 : int32_t maxArg = -1;
79 0 : UBool inQuote = FALSE;
80 0 : for (int32_t i = 0; i < patternLength;) {
81 0 : UChar c = patternBuffer[i++];
82 0 : if (c == APOS) {
83 0 : if (i < patternLength && (c = patternBuffer[i]) == APOS) {
84 : // double apostrophe, skip the second one
85 0 : ++i;
86 0 : } else if (inQuote) {
87 : // skip the quote-ending apostrophe
88 0 : inQuote = FALSE;
89 0 : continue;
90 0 : } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
91 : // Skip the quote-starting apostrophe, find the end of the quoted literal text.
92 0 : ++i;
93 0 : inQuote = TRUE;
94 : } else {
95 : // The apostrophe is part of literal text.
96 0 : c = APOS;
97 : }
98 0 : } else if (!inQuote && c == OPEN_BRACE) {
99 0 : if (textLength > 0) {
100 0 : compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
101 0 : (UChar)(ARG_NUM_LIMIT + textLength));
102 0 : textLength = 0;
103 : }
104 : int32_t argNumber;
105 0 : if ((i + 1) < patternLength &&
106 0 : 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
107 0 : patternBuffer[i + 1] == CLOSE_BRACE) {
108 0 : i += 2;
109 : } else {
110 : // Multi-digit argument number (no leading zero) or syntax error.
111 : // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
112 : // around the number, but this class does not.
113 0 : argNumber = -1;
114 0 : if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
115 0 : argNumber = c - DIGIT_ZERO;
116 0 : while (i < patternLength &&
117 0 : DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
118 0 : argNumber = argNumber * 10 + (c - DIGIT_ZERO);
119 0 : if (argNumber >= ARG_NUM_LIMIT) {
120 0 : break;
121 : }
122 : }
123 : }
124 0 : if (argNumber < 0 || c != CLOSE_BRACE) {
125 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
126 0 : return FALSE;
127 : }
128 : }
129 0 : if (argNumber > maxArg) {
130 0 : maxArg = argNumber;
131 : }
132 0 : compiledPattern.append((UChar)argNumber);
133 0 : continue;
134 : } // else: c is part of literal text
135 : // Append c and track the literal-text segment length.
136 0 : if (textLength == 0) {
137 : // Reserve a char for the length of a new text segment, preset the maximum length.
138 0 : compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
139 : }
140 0 : compiledPattern.append(c);
141 0 : if (++textLength == MAX_SEGMENT_LENGTH) {
142 0 : textLength = 0;
143 : }
144 : }
145 0 : if (textLength > 0) {
146 0 : compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
147 0 : (UChar)(ARG_NUM_LIMIT + textLength));
148 : }
149 0 : int32_t argCount = maxArg + 1;
150 0 : if (argCount < min || max < argCount) {
151 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
152 0 : return FALSE;
153 : }
154 0 : compiledPattern.setCharAt(0, (UChar)argCount);
155 0 : return TRUE;
156 : }
157 :
158 0 : UnicodeString& SimpleFormatter::format(
159 : const UnicodeString &value0,
160 : UnicodeString &appendTo, UErrorCode &errorCode) const {
161 0 : const UnicodeString *values[] = { &value0 };
162 0 : return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
163 : }
164 :
165 0 : UnicodeString& SimpleFormatter::format(
166 : const UnicodeString &value0,
167 : const UnicodeString &value1,
168 : UnicodeString &appendTo, UErrorCode &errorCode) const {
169 0 : const UnicodeString *values[] = { &value0, &value1 };
170 0 : return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
171 : }
172 :
173 0 : UnicodeString& SimpleFormatter::format(
174 : const UnicodeString &value0,
175 : const UnicodeString &value1,
176 : const UnicodeString &value2,
177 : UnicodeString &appendTo, UErrorCode &errorCode) const {
178 0 : const UnicodeString *values[] = { &value0, &value1, &value2 };
179 0 : return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
180 : }
181 :
182 0 : UnicodeString& SimpleFormatter::formatAndAppend(
183 : const UnicodeString *const *values, int32_t valuesLength,
184 : UnicodeString &appendTo,
185 : int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
186 0 : if (U_FAILURE(errorCode)) {
187 0 : return appendTo;
188 : }
189 0 : if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
190 0 : valuesLength < getArgumentLimit()) {
191 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
192 0 : return appendTo;
193 : }
194 : return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
195 : appendTo, NULL, TRUE,
196 0 : offsets, offsetsLength, errorCode);
197 : }
198 :
199 0 : UnicodeString &SimpleFormatter::formatAndReplace(
200 : const UnicodeString *const *values, int32_t valuesLength,
201 : UnicodeString &result,
202 : int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
203 0 : if (U_FAILURE(errorCode)) {
204 0 : return result;
205 : }
206 0 : if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
207 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
208 0 : return result;
209 : }
210 0 : const UChar *cp = compiledPattern.getBuffer();
211 0 : int32_t cpLength = compiledPattern.length();
212 0 : if (valuesLength < getArgumentLimit(cp, cpLength)) {
213 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
214 0 : return result;
215 : }
216 :
217 : // If the pattern starts with an argument whose value is the same object
218 : // as the result, then we keep the result contents and append to it.
219 : // Otherwise we replace its contents.
220 0 : int32_t firstArg = -1;
221 : // If any non-initial argument value is the same object as the result,
222 : // then we first copy its contents and use that instead while formatting.
223 0 : UnicodeString resultCopy;
224 0 : if (getArgumentLimit(cp, cpLength) > 0) {
225 0 : for (int32_t i = 1; i < cpLength;) {
226 0 : int32_t n = cp[i++];
227 0 : if (n < ARG_NUM_LIMIT) {
228 0 : if (values[n] == &result) {
229 0 : if (i == 2) {
230 0 : firstArg = n;
231 0 : } else if (resultCopy.isEmpty() && !result.isEmpty()) {
232 0 : resultCopy = result;
233 : }
234 : }
235 : } else {
236 0 : i += n - ARG_NUM_LIMIT;
237 : }
238 : }
239 : }
240 0 : if (firstArg < 0) {
241 0 : result.remove();
242 : }
243 : return format(cp, cpLength, values,
244 : result, &resultCopy, FALSE,
245 0 : offsets, offsetsLength, errorCode);
246 : }
247 :
248 0 : UnicodeString SimpleFormatter::getTextWithNoArguments(
249 : const UChar *compiledPattern, int32_t compiledPatternLength) {
250 0 : int32_t capacity = compiledPatternLength - 1 -
251 0 : getArgumentLimit(compiledPattern, compiledPatternLength);
252 0 : UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
253 0 : for (int32_t i = 1; i < compiledPatternLength;) {
254 0 : int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
255 0 : if (segmentLength > 0) {
256 0 : sb.append(compiledPattern + i, segmentLength);
257 0 : i += segmentLength;
258 : }
259 : }
260 0 : return sb;
261 : }
262 :
263 0 : UnicodeString &SimpleFormatter::format(
264 : const UChar *compiledPattern, int32_t compiledPatternLength,
265 : const UnicodeString *const *values,
266 : UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
267 : int32_t *offsets, int32_t offsetsLength,
268 : UErrorCode &errorCode) {
269 0 : if (U_FAILURE(errorCode)) {
270 0 : return result;
271 : }
272 0 : for (int32_t i = 0; i < offsetsLength; i++) {
273 0 : offsets[i] = -1;
274 : }
275 0 : for (int32_t i = 1; i < compiledPatternLength;) {
276 0 : int32_t n = compiledPattern[i++];
277 0 : if (n < ARG_NUM_LIMIT) {
278 0 : const UnicodeString *value = values[n];
279 0 : if (value == NULL) {
280 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
281 0 : return result;
282 : }
283 0 : if (value == &result) {
284 0 : if (forbidResultAsValue) {
285 0 : errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286 0 : return result;
287 : }
288 0 : if (i == 2) {
289 : // We are appending to result which is also the first value object.
290 0 : if (n < offsetsLength) {
291 0 : offsets[n] = 0;
292 : }
293 : } else {
294 0 : if (n < offsetsLength) {
295 0 : offsets[n] = result.length();
296 : }
297 0 : result.append(*resultCopy);
298 : }
299 : } else {
300 0 : if (n < offsetsLength) {
301 0 : offsets[n] = result.length();
302 : }
303 0 : result.append(*value);
304 : }
305 : } else {
306 0 : int32_t length = n - ARG_NUM_LIMIT;
307 0 : result.append(compiledPattern + i, length);
308 0 : i += length;
309 : }
310 : }
311 0 : return result;
312 : }
313 :
314 : U_NAMESPACE_END
|