Line data Source code
1 : // © 2016 and later: Unicode, Inc. and others.
2 : // License & terms of use: http://www.unicode.org/copyright.html
3 : /*
4 : **********************************************************************
5 : * Copyright (C) 1998-2016, International Business Machines
6 : * Corporation and others. All Rights Reserved.
7 : **********************************************************************
8 : *
9 : * File unistr.h
10 : *
11 : * Modification History:
12 : *
13 : * Date Name Description
14 : * 09/25/98 stephen Creation.
15 : * 11/11/98 stephen Changed per 11/9 code review.
16 : * 04/20/99 stephen Overhauled per 4/16 code review.
17 : * 11/18/99 aliu Made to inherit from Replaceable. Added method
18 : * handleReplaceBetween(); other methods unchanged.
19 : * 06/25/01 grhoten Remove dependency on iostream.
20 : ******************************************************************************
21 : */
22 :
23 : #ifndef UNISTR_H
24 : #define UNISTR_H
25 :
26 : /**
27 : * \file
28 : * \brief C++ API: Unicode String
29 : */
30 :
31 : #include <cstddef>
32 : #include "unicode/utypes.h"
33 : #include "unicode/char16ptr.h"
34 : #include "unicode/rep.h"
35 : #include "unicode/std_string.h"
36 : #include "unicode/stringpiece.h"
37 : #include "unicode/bytestream.h"
38 :
39 : struct UConverter; // unicode/ucnv.h
40 :
41 : #ifndef U_COMPARE_CODE_POINT_ORDER
42 : /* see also ustring.h and unorm.h */
43 : /**
44 : * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
45 : * Compare strings in code point order instead of code unit order.
46 : * @stable ICU 2.2
47 : */
48 : #define U_COMPARE_CODE_POINT_ORDER 0x8000
49 : #endif
50 :
51 : #ifndef USTRING_H
52 : /**
53 : * \ingroup ustring_ustrlen
54 : */
55 : U_STABLE int32_t U_EXPORT2
56 : u_strlen(const UChar *s);
57 : #endif
58 :
59 : U_NAMESPACE_BEGIN
60 :
61 : #if !UCONFIG_NO_BREAK_ITERATION
62 : class BreakIterator; // unicode/brkiter.h
63 : #endif
64 : class Edits;
65 :
66 : U_NAMESPACE_END
67 :
68 : // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
69 : /**
70 : * Internal string case mapping function type.
71 : * All error checking must be done.
72 : * src and dest must not overlap.
73 : * @internal
74 : */
75 : typedef int32_t U_CALLCONV
76 : UStringCaseMapper(int32_t caseLocale, uint32_t options,
77 : #if !UCONFIG_NO_BREAK_ITERATION
78 : icu::BreakIterator *iter,
79 : #endif
80 : char16_t *dest, int32_t destCapacity,
81 : const char16_t *src, int32_t srcLength,
82 : icu::Edits *edits,
83 : UErrorCode &errorCode);
84 :
85 : U_NAMESPACE_BEGIN
86 :
87 : class Locale; // unicode/locid.h
88 : class StringCharacterIterator;
89 : class UnicodeStringAppendable; // unicode/appendable.h
90 :
91 : /* The <iostream> include has been moved to unicode/ustream.h */
92 :
93 : /**
94 : * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
95 : * which constructs a Unicode string from an invariant-character char * string.
96 : * About invariant characters see utypes.h.
97 : * This constructor has no runtime dependency on conversion code and is
98 : * therefore recommended over ones taking a charset name string
99 : * (where the empty string "" indicates invariant-character conversion).
100 : *
101 : * @stable ICU 3.2
102 : */
103 : #define US_INV icu::UnicodeString::kInvariant
104 :
105 : /**
106 : * Unicode String literals in C++.
107 : *
108 : * Note: these macros are not recommended for new code.
109 : * Prior to the availability of C++11 and u"unicode string literals",
110 : * these macros were provided for portability and efficiency when
111 : * initializing UnicodeStrings from literals.
112 : *
113 : * They work only for strings that contain "invariant characters", i.e.,
114 : * only latin letters, digits, and some punctuation.
115 : * See utypes.h for details.
116 : *
117 : * The string parameter must be a C string literal.
118 : * The length of the string, not including the terminating
119 : * <code>NUL</code>, must be specified as a constant.
120 : * @stable ICU 2.0
121 : */
122 : #if !U_CHAR16_IS_TYPEDEF
123 : # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
124 : #else
125 : # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
126 : #endif
127 :
128 : /**
129 : * Unicode String literals in C++.
130 : * Dependent on the platform properties, different UnicodeString
131 : * constructors should be used to create a UnicodeString object from
132 : * a string literal.
133 : * The macros are defined for improved performance.
134 : * They work only for strings that contain "invariant characters", i.e.,
135 : * only latin letters, digits, and some punctuation.
136 : * See utypes.h for details.
137 : *
138 : * The string parameter must be a C string literal.
139 : * @stable ICU 2.0
140 : */
141 : #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
142 :
143 : /**
144 : * \def UNISTR_FROM_CHAR_EXPLICIT
145 : * This can be defined to be empty or "explicit".
146 : * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
147 : * constructors are marked as explicit, preventing their inadvertent use.
148 : * @stable ICU 49
149 : */
150 : #ifndef UNISTR_FROM_CHAR_EXPLICIT
151 : # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
152 : // Auto-"explicit" in ICU library code.
153 : # define UNISTR_FROM_CHAR_EXPLICIT explicit
154 : # else
155 : // Empty by default for source code compatibility.
156 : # define UNISTR_FROM_CHAR_EXPLICIT
157 : # endif
158 : #endif
159 :
160 : /**
161 : * \def UNISTR_FROM_STRING_EXPLICIT
162 : * This can be defined to be empty or "explicit".
163 : * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
164 : * constructors are marked as explicit, preventing their inadvertent use.
165 : *
166 : * In particular, this helps prevent accidentally depending on ICU conversion code
167 : * by passing a string literal into an API with a const UnicodeString & parameter.
168 : * @stable ICU 49
169 : */
170 : #ifndef UNISTR_FROM_STRING_EXPLICIT
171 : # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
172 : // Auto-"explicit" in ICU library code.
173 : # define UNISTR_FROM_STRING_EXPLICIT explicit
174 : # else
175 : // Empty by default for source code compatibility.
176 : # define UNISTR_FROM_STRING_EXPLICIT
177 : # endif
178 : #endif
179 :
180 : /**
181 : * \def UNISTR_OBJECT_SIZE
182 : * Desired sizeof(UnicodeString) in bytes.
183 : * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
184 : * The object size may want to be a multiple of 16 bytes,
185 : * which is a common granularity for heap allocation.
186 : *
187 : * Any space inside the object beyond sizeof(vtable pointer) + 2
188 : * is available for storing short strings inside the object.
189 : * The bigger the object, the longer a string that can be stored inside the object,
190 : * without additional heap allocation.
191 : *
192 : * Depending on a platform's pointer size, pointer alignment requirements,
193 : * and struct padding, the compiler will usually round up sizeof(UnicodeString)
194 : * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
195 : * to hold the fields for heap-allocated strings.
196 : * Such a minimum size also ensures that the object is easily large enough
197 : * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
198 : *
199 : * sizeof(UnicodeString) >= 48 should work for all known platforms.
200 : *
201 : * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
202 : * sizeof(UnicodeString) = 64 would leave space for
203 : * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
204 : * char16_ts stored inside the object.
205 : *
206 : * The minimum object size on a 64-bit machine would be
207 : * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
208 : * and the internal buffer would hold up to 11 char16_ts in that case.
209 : *
210 : * @see U16_MAX_LENGTH
211 : * @stable ICU 56
212 : */
213 : #ifndef UNISTR_OBJECT_SIZE
214 : # define UNISTR_OBJECT_SIZE 64
215 : #endif
216 :
217 : /**
218 : * UnicodeString is a string class that stores Unicode characters directly and provides
219 : * similar functionality as the Java String and StringBuffer/StringBuilder classes.
220 : * It is a concrete implementation of the abstract class Replaceable (for transliteration).
221 : *
222 : * A UnicodeString may also "alias" an external array of characters
223 : * (that is, point to it, rather than own the array)
224 : * whose lifetime must then at least match the lifetime of the aliasing object.
225 : * This aliasing may be preserved when returning a UnicodeString by value,
226 : * depending on the compiler and the function implementation,
227 : * via Return Value Optimization (RVO) or the move assignment operator.
228 : * (However, the copy assignment operator does not preserve aliasing.)
229 : * For details see the description of storage models at the end of the class API docs
230 : * and in the User Guide chapter linked from there.
231 : *
232 : * The UnicodeString class is not suitable for subclassing.
233 : *
234 : * <p>For an overview of Unicode strings in C and C++ see the
235 : * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
236 : *
237 : * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
238 : * A Unicode character may be stored with either one code unit
239 : * (the most common case) or with a matched pair of special code units
240 : * ("surrogates"). The data type for code units is char16_t.
241 : * For single-character handling, a Unicode character code <em>point</em> is a value
242 : * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
243 : *
244 : * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
245 : * This is the same as with multi-byte char* strings in traditional string handling.
246 : * Operations on partial strings typically do not test for code point boundaries.
247 : * If necessary, the user needs to take care of such boundaries by testing for the code unit
248 : * values or by using functions like
249 : * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
250 : * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
251 : *
252 : * UnicodeString methods are more lenient with regard to input parameter values
253 : * than other ICU APIs. In particular:
254 : * - If indexes are out of bounds for a UnicodeString object
255 : * (<0 or >length()) then they are "pinned" to the nearest boundary.
256 : * - If primitive string pointer values (e.g., const char16_t * or char *)
257 : * for input strings are NULL, then those input string parameters are treated
258 : * as if they pointed to an empty string.
259 : * However, this is <em>not</em> the case for char * parameters for charset names
260 : * or other IDs.
261 : * - Most UnicodeString methods do not take a UErrorCode parameter because
262 : * there are usually very few opportunities for failure other than a shortage
263 : * of memory, error codes in low-level C++ string methods would be inconvenient,
264 : * and the error code as the last parameter (ICU convention) would prevent
265 : * the use of default parameter values.
266 : * Instead, such methods set the UnicodeString into a "bogus" state
267 : * (see isBogus()) if an error occurs.
268 : *
269 : * In string comparisons, two UnicodeString objects that are both "bogus"
270 : * compare equal (to be transitive and prevent endless loops in sorting),
271 : * and a "bogus" string compares less than any non-"bogus" one.
272 : *
273 : * Const UnicodeString methods are thread-safe. Multiple threads can use
274 : * const methods on the same UnicodeString object simultaneously,
275 : * but non-const methods must not be called concurrently (in multiple threads)
276 : * with any other (const or non-const) methods.
277 : *
278 : * Similarly, const UnicodeString & parameters are thread-safe.
279 : * One object may be passed in as such a parameter concurrently in multiple threads.
280 : * This includes the const UnicodeString & parameters for
281 : * copy construction, assignment, and cloning.
282 : *
283 : * <p>UnicodeString uses several storage methods.
284 : * String contents can be stored inside the UnicodeString object itself,
285 : * in an allocated and shared buffer, or in an outside buffer that is "aliased".
286 : * Most of this is done transparently, but careful aliasing in particular provides
287 : * significant performance improvements.
288 : * Also, the internal buffer is accessible via special functions.
289 : * For details see the
290 : * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
291 : *
292 : * @see utf.h
293 : * @see CharacterIterator
294 : * @stable ICU 2.0
295 : */
296 : class U_COMMON_API UnicodeString : public Replaceable
297 : {
298 : public:
299 :
300 : /**
301 : * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
302 : * which constructs a Unicode string from an invariant-character char * string.
303 : * Use the macro US_INV instead of the full qualification for this value.
304 : *
305 : * @see US_INV
306 : * @stable ICU 3.2
307 : */
308 : enum EInvariant {
309 : /**
310 : * @see EInvariant
311 : * @stable ICU 3.2
312 : */
313 : kInvariant
314 : };
315 :
316 : //========================================
317 : // Read-only operations
318 : //========================================
319 :
320 : /* Comparison - bitwise only - for international comparison use collation */
321 :
322 : /**
323 : * Equality operator. Performs only bitwise comparison.
324 : * @param text The UnicodeString to compare to this one.
325 : * @return TRUE if <TT>text</TT> contains the same characters as this one,
326 : * FALSE otherwise.
327 : * @stable ICU 2.0
328 : */
329 : inline UBool operator== (const UnicodeString& text) const;
330 :
331 : /**
332 : * Inequality operator. Performs only bitwise comparison.
333 : * @param text The UnicodeString to compare to this one.
334 : * @return FALSE if <TT>text</TT> contains the same characters as this one,
335 : * TRUE otherwise.
336 : * @stable ICU 2.0
337 : */
338 : inline UBool operator!= (const UnicodeString& text) const;
339 :
340 : /**
341 : * Greater than operator. Performs only bitwise comparison.
342 : * @param text The UnicodeString to compare to this one.
343 : * @return TRUE if the characters in this are bitwise
344 : * greater than the characters in <code>text</code>, FALSE otherwise
345 : * @stable ICU 2.0
346 : */
347 : inline UBool operator> (const UnicodeString& text) const;
348 :
349 : /**
350 : * Less than operator. Performs only bitwise comparison.
351 : * @param text The UnicodeString to compare to this one.
352 : * @return TRUE if the characters in this are bitwise
353 : * less than the characters in <code>text</code>, FALSE otherwise
354 : * @stable ICU 2.0
355 : */
356 : inline UBool operator< (const UnicodeString& text) const;
357 :
358 : /**
359 : * Greater than or equal operator. Performs only bitwise comparison.
360 : * @param text The UnicodeString to compare to this one.
361 : * @return TRUE if the characters in this are bitwise
362 : * greater than or equal to the characters in <code>text</code>, FALSE otherwise
363 : * @stable ICU 2.0
364 : */
365 : inline UBool operator>= (const UnicodeString& text) const;
366 :
367 : /**
368 : * Less than or equal operator. Performs only bitwise comparison.
369 : * @param text The UnicodeString to compare to this one.
370 : * @return TRUE if the characters in this are bitwise
371 : * less than or equal to the characters in <code>text</code>, FALSE otherwise
372 : * @stable ICU 2.0
373 : */
374 : inline UBool operator<= (const UnicodeString& text) const;
375 :
376 : /**
377 : * Compare the characters bitwise in this UnicodeString to
378 : * the characters in <code>text</code>.
379 : * @param text The UnicodeString to compare to this one.
380 : * @return The result of bitwise character comparison: 0 if this
381 : * contains the same characters as <code>text</code>, -1 if the characters in
382 : * this are bitwise less than the characters in <code>text</code>, +1 if the
383 : * characters in this are bitwise greater than the characters
384 : * in <code>text</code>.
385 : * @stable ICU 2.0
386 : */
387 : inline int8_t compare(const UnicodeString& text) const;
388 :
389 : /**
390 : * Compare the characters bitwise in the range
391 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
392 : * in the <b>entire string</b> <TT>text</TT>.
393 : * (The parameters "start" and "length" are not applied to the other text "text".)
394 : * @param start the offset at which the compare operation begins
395 : * @param length the number of characters of text to compare.
396 : * @param text the other text to be compared against this string.
397 : * @return The result of bitwise character comparison: 0 if this
398 : * contains the same characters as <code>text</code>, -1 if the characters in
399 : * this are bitwise less than the characters in <code>text</code>, +1 if the
400 : * characters in this are bitwise greater than the characters
401 : * in <code>text</code>.
402 : * @stable ICU 2.0
403 : */
404 : inline int8_t compare(int32_t start,
405 : int32_t length,
406 : const UnicodeString& text) const;
407 :
408 : /**
409 : * Compare the characters bitwise in the range
410 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
411 : * in <TT>srcText</TT> in the range
412 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
413 : * @param start the offset at which the compare operation begins
414 : * @param length the number of characters in this to compare.
415 : * @param srcText the text to be compared
416 : * @param srcStart the offset into <TT>srcText</TT> to start comparison
417 : * @param srcLength the number of characters in <TT>src</TT> to compare
418 : * @return The result of bitwise character comparison: 0 if this
419 : * contains the same characters as <code>srcText</code>, -1 if the characters in
420 : * this are bitwise less than the characters in <code>srcText</code>, +1 if the
421 : * characters in this are bitwise greater than the characters
422 : * in <code>srcText</code>.
423 : * @stable ICU 2.0
424 : */
425 : inline int8_t compare(int32_t start,
426 : int32_t length,
427 : const UnicodeString& srcText,
428 : int32_t srcStart,
429 : int32_t srcLength) const;
430 :
431 : /**
432 : * Compare the characters bitwise in this UnicodeString with the first
433 : * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
434 : * @param srcChars The characters to compare to this UnicodeString.
435 : * @param srcLength the number of characters in <TT>srcChars</TT> to compare
436 : * @return The result of bitwise character comparison: 0 if this
437 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
438 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
439 : * characters in this are bitwise greater than the characters
440 : * in <code>srcChars</code>.
441 : * @stable ICU 2.0
442 : */
443 : inline int8_t compare(ConstChar16Ptr srcChars,
444 : int32_t srcLength) const;
445 :
446 : /**
447 : * Compare the characters bitwise in the range
448 : * [<TT>start</TT>, <TT>start + length</TT>) with the first
449 : * <TT>length</TT> characters in <TT>srcChars</TT>
450 : * @param start the offset at which the compare operation begins
451 : * @param length the number of characters to compare.
452 : * @param srcChars the characters to be compared
453 : * @return The result of bitwise character comparison: 0 if this
454 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
455 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
456 : * characters in this are bitwise greater than the characters
457 : * in <code>srcChars</code>.
458 : * @stable ICU 2.0
459 : */
460 : inline int8_t compare(int32_t start,
461 : int32_t length,
462 : const char16_t *srcChars) const;
463 :
464 : /**
465 : * Compare the characters bitwise in the range
466 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
467 : * in <TT>srcChars</TT> in the range
468 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
469 : * @param start the offset at which the compare operation begins
470 : * @param length the number of characters in this to compare
471 : * @param srcChars the characters to be compared
472 : * @param srcStart the offset into <TT>srcChars</TT> to start comparison
473 : * @param srcLength the number of characters in <TT>srcChars</TT> to compare
474 : * @return The result of bitwise character comparison: 0 if this
475 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
476 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
477 : * characters in this are bitwise greater than the characters
478 : * in <code>srcChars</code>.
479 : * @stable ICU 2.0
480 : */
481 : inline int8_t compare(int32_t start,
482 : int32_t length,
483 : const char16_t *srcChars,
484 : int32_t srcStart,
485 : int32_t srcLength) const;
486 :
487 : /**
488 : * Compare the characters bitwise in the range
489 : * [<TT>start</TT>, <TT>limit</TT>) with the characters
490 : * in <TT>srcText</TT> in the range
491 : * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
492 : * @param start the offset at which the compare operation begins
493 : * @param limit the offset immediately following the compare operation
494 : * @param srcText the text to be compared
495 : * @param srcStart the offset into <TT>srcText</TT> to start comparison
496 : * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
497 : * @return The result of bitwise character comparison: 0 if this
498 : * contains the same characters as <code>srcText</code>, -1 if the characters in
499 : * this are bitwise less than the characters in <code>srcText</code>, +1 if the
500 : * characters in this are bitwise greater than the characters
501 : * in <code>srcText</code>.
502 : * @stable ICU 2.0
503 : */
504 : inline int8_t compareBetween(int32_t start,
505 : int32_t limit,
506 : const UnicodeString& srcText,
507 : int32_t srcStart,
508 : int32_t srcLimit) const;
509 :
510 : /**
511 : * Compare two Unicode strings in code point order.
512 : * The result may be different from the results of compare(), operator<, etc.
513 : * if supplementary characters are present:
514 : *
515 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
516 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
517 : * which means that they compare as less than some other BMP characters like U+feff.
518 : * This function compares Unicode strings in code point order.
519 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
520 : *
521 : * @param text Another string to compare this one to.
522 : * @return a negative/zero/positive integer corresponding to whether
523 : * this string is less than/equal to/greater than the second one
524 : * in code point order
525 : * @stable ICU 2.0
526 : */
527 : inline int8_t compareCodePointOrder(const UnicodeString& text) const;
528 :
529 : /**
530 : * Compare two Unicode strings in code point order.
531 : * The result may be different from the results of compare(), operator<, etc.
532 : * if supplementary characters are present:
533 : *
534 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
535 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
536 : * which means that they compare as less than some other BMP characters like U+feff.
537 : * This function compares Unicode strings in code point order.
538 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
539 : *
540 : * @param start The start offset in this string at which the compare operation begins.
541 : * @param length The number of code units from this string to compare.
542 : * @param srcText Another string to compare this one to.
543 : * @return a negative/zero/positive integer corresponding to whether
544 : * this string is less than/equal to/greater than the second one
545 : * in code point order
546 : * @stable ICU 2.0
547 : */
548 : inline int8_t compareCodePointOrder(int32_t start,
549 : int32_t length,
550 : const UnicodeString& srcText) const;
551 :
552 : /**
553 : * Compare two Unicode strings in code point order.
554 : * The result may be different from the results of compare(), operator<, etc.
555 : * if supplementary characters are present:
556 : *
557 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
558 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
559 : * which means that they compare as less than some other BMP characters like U+feff.
560 : * This function compares Unicode strings in code point order.
561 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
562 : *
563 : * @param start The start offset in this string at which the compare operation begins.
564 : * @param length The number of code units from this string to compare.
565 : * @param srcText Another string to compare this one to.
566 : * @param srcStart The start offset in that string at which the compare operation begins.
567 : * @param srcLength The number of code units from that string to compare.
568 : * @return a negative/zero/positive integer corresponding to whether
569 : * this string is less than/equal to/greater than the second one
570 : * in code point order
571 : * @stable ICU 2.0
572 : */
573 : inline int8_t compareCodePointOrder(int32_t start,
574 : int32_t length,
575 : const UnicodeString& srcText,
576 : int32_t srcStart,
577 : int32_t srcLength) const;
578 :
579 : /**
580 : * Compare two Unicode strings in code point order.
581 : * The result may be different from the results of compare(), operator<, etc.
582 : * if supplementary characters are present:
583 : *
584 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
585 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
586 : * which means that they compare as less than some other BMP characters like U+feff.
587 : * This function compares Unicode strings in code point order.
588 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
589 : *
590 : * @param srcChars A pointer to another string to compare this one to.
591 : * @param srcLength The number of code units from that string to compare.
592 : * @return a negative/zero/positive integer corresponding to whether
593 : * this string is less than/equal to/greater than the second one
594 : * in code point order
595 : * @stable ICU 2.0
596 : */
597 : inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
598 : int32_t srcLength) const;
599 :
600 : /**
601 : * Compare two Unicode strings in code point order.
602 : * The result may be different from the results of compare(), operator<, etc.
603 : * if supplementary characters are present:
604 : *
605 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
606 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
607 : * which means that they compare as less than some other BMP characters like U+feff.
608 : * This function compares Unicode strings in code point order.
609 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
610 : *
611 : * @param start The start offset in this string at which the compare operation begins.
612 : * @param length The number of code units from this string to compare.
613 : * @param srcChars A pointer to another string to compare this one to.
614 : * @return a negative/zero/positive integer corresponding to whether
615 : * this string is less than/equal to/greater than the second one
616 : * in code point order
617 : * @stable ICU 2.0
618 : */
619 : inline int8_t compareCodePointOrder(int32_t start,
620 : int32_t length,
621 : const char16_t *srcChars) const;
622 :
623 : /**
624 : * Compare two Unicode strings in code point order.
625 : * The result may be different from the results of compare(), operator<, etc.
626 : * if supplementary characters are present:
627 : *
628 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
629 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
630 : * which means that they compare as less than some other BMP characters like U+feff.
631 : * This function compares Unicode strings in code point order.
632 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
633 : *
634 : * @param start The start offset in this string at which the compare operation begins.
635 : * @param length The number of code units from this string to compare.
636 : * @param srcChars A pointer to another string to compare this one to.
637 : * @param srcStart The start offset in that string at which the compare operation begins.
638 : * @param srcLength The number of code units from that string to compare.
639 : * @return a negative/zero/positive integer corresponding to whether
640 : * this string is less than/equal to/greater than the second one
641 : * in code point order
642 : * @stable ICU 2.0
643 : */
644 : inline int8_t compareCodePointOrder(int32_t start,
645 : int32_t length,
646 : const char16_t *srcChars,
647 : int32_t srcStart,
648 : int32_t srcLength) const;
649 :
650 : /**
651 : * Compare two Unicode strings in code point order.
652 : * The result may be different from the results of compare(), operator<, etc.
653 : * if supplementary characters are present:
654 : *
655 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
656 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
657 : * which means that they compare as less than some other BMP characters like U+feff.
658 : * This function compares Unicode strings in code point order.
659 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
660 : *
661 : * @param start The start offset in this string at which the compare operation begins.
662 : * @param limit The offset after the last code unit from this string to compare.
663 : * @param srcText Another string to compare this one to.
664 : * @param srcStart The start offset in that string at which the compare operation begins.
665 : * @param srcLimit The offset after the last code unit from that string to compare.
666 : * @return a negative/zero/positive integer corresponding to whether
667 : * this string is less than/equal to/greater than the second one
668 : * in code point order
669 : * @stable ICU 2.0
670 : */
671 : inline int8_t compareCodePointOrderBetween(int32_t start,
672 : int32_t limit,
673 : const UnicodeString& srcText,
674 : int32_t srcStart,
675 : int32_t srcLimit) const;
676 :
677 : /**
678 : * Compare two strings case-insensitively using full case folding.
679 : * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
680 : *
681 : * @param text Another string to compare this one to.
682 : * @param options A bit set of options:
683 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
684 : * Comparison in code unit order with default case folding.
685 : *
686 : * - U_COMPARE_CODE_POINT_ORDER
687 : * Set to choose code point order instead of code unit order
688 : * (see u_strCompare for details).
689 : *
690 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
691 : *
692 : * @return A negative, zero, or positive integer indicating the comparison result.
693 : * @stable ICU 2.0
694 : */
695 : inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
696 :
697 : /**
698 : * Compare two strings case-insensitively using full case folding.
699 : * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
700 : *
701 : * @param start The start offset in this string at which the compare operation begins.
702 : * @param length The number of code units from this string to compare.
703 : * @param srcText Another string to compare this one to.
704 : * @param options A bit set of options:
705 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
706 : * Comparison in code unit order with default case folding.
707 : *
708 : * - U_COMPARE_CODE_POINT_ORDER
709 : * Set to choose code point order instead of code unit order
710 : * (see u_strCompare for details).
711 : *
712 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
713 : *
714 : * @return A negative, zero, or positive integer indicating the comparison result.
715 : * @stable ICU 2.0
716 : */
717 : inline int8_t caseCompare(int32_t start,
718 : int32_t length,
719 : const UnicodeString& srcText,
720 : uint32_t options) const;
721 :
722 : /**
723 : * Compare two strings case-insensitively using full case folding.
724 : * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
725 : *
726 : * @param start The start offset in this string at which the compare operation begins.
727 : * @param length The number of code units from this string to compare.
728 : * @param srcText Another string to compare this one to.
729 : * @param srcStart The start offset in that string at which the compare operation begins.
730 : * @param srcLength The number of code units from that string to compare.
731 : * @param options A bit set of options:
732 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
733 : * Comparison in code unit order with default case folding.
734 : *
735 : * - U_COMPARE_CODE_POINT_ORDER
736 : * Set to choose code point order instead of code unit order
737 : * (see u_strCompare for details).
738 : *
739 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
740 : *
741 : * @return A negative, zero, or positive integer indicating the comparison result.
742 : * @stable ICU 2.0
743 : */
744 : inline int8_t caseCompare(int32_t start,
745 : int32_t length,
746 : const UnicodeString& srcText,
747 : int32_t srcStart,
748 : int32_t srcLength,
749 : uint32_t options) const;
750 :
751 : /**
752 : * Compare two strings case-insensitively using full case folding.
753 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
754 : *
755 : * @param srcChars A pointer to another string to compare this one to.
756 : * @param srcLength The number of code units from that string to compare.
757 : * @param options A bit set of options:
758 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
759 : * Comparison in code unit order with default case folding.
760 : *
761 : * - U_COMPARE_CODE_POINT_ORDER
762 : * Set to choose code point order instead of code unit order
763 : * (see u_strCompare for details).
764 : *
765 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
766 : *
767 : * @return A negative, zero, or positive integer indicating the comparison result.
768 : * @stable ICU 2.0
769 : */
770 : inline int8_t caseCompare(ConstChar16Ptr srcChars,
771 : int32_t srcLength,
772 : uint32_t options) const;
773 :
774 : /**
775 : * Compare two strings case-insensitively using full case folding.
776 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
777 : *
778 : * @param start The start offset in this string at which the compare operation begins.
779 : * @param length The number of code units from this string to compare.
780 : * @param srcChars A pointer to another string to compare this one to.
781 : * @param options A bit set of options:
782 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
783 : * Comparison in code unit order with default case folding.
784 : *
785 : * - U_COMPARE_CODE_POINT_ORDER
786 : * Set to choose code point order instead of code unit order
787 : * (see u_strCompare for details).
788 : *
789 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
790 : *
791 : * @return A negative, zero, or positive integer indicating the comparison result.
792 : * @stable ICU 2.0
793 : */
794 : inline int8_t caseCompare(int32_t start,
795 : int32_t length,
796 : const char16_t *srcChars,
797 : uint32_t options) const;
798 :
799 : /**
800 : * Compare two strings case-insensitively using full case folding.
801 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
802 : *
803 : * @param start The start offset in this string at which the compare operation begins.
804 : * @param length The number of code units from this string to compare.
805 : * @param srcChars A pointer to another string to compare this one to.
806 : * @param srcStart The start offset in that string at which the compare operation begins.
807 : * @param srcLength The number of code units from that string to compare.
808 : * @param options A bit set of options:
809 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
810 : * Comparison in code unit order with default case folding.
811 : *
812 : * - U_COMPARE_CODE_POINT_ORDER
813 : * Set to choose code point order instead of code unit order
814 : * (see u_strCompare for details).
815 : *
816 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
817 : *
818 : * @return A negative, zero, or positive integer indicating the comparison result.
819 : * @stable ICU 2.0
820 : */
821 : inline int8_t caseCompare(int32_t start,
822 : int32_t length,
823 : const char16_t *srcChars,
824 : int32_t srcStart,
825 : int32_t srcLength,
826 : uint32_t options) const;
827 :
828 : /**
829 : * Compare two strings case-insensitively using full case folding.
830 : * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
831 : *
832 : * @param start The start offset in this string at which the compare operation begins.
833 : * @param limit The offset after the last code unit from this string to compare.
834 : * @param srcText Another string to compare this one to.
835 : * @param srcStart The start offset in that string at which the compare operation begins.
836 : * @param srcLimit The offset after the last code unit from that string to compare.
837 : * @param options A bit set of options:
838 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
839 : * Comparison in code unit order with default case folding.
840 : *
841 : * - U_COMPARE_CODE_POINT_ORDER
842 : * Set to choose code point order instead of code unit order
843 : * (see u_strCompare for details).
844 : *
845 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
846 : *
847 : * @return A negative, zero, or positive integer indicating the comparison result.
848 : * @stable ICU 2.0
849 : */
850 : inline int8_t caseCompareBetween(int32_t start,
851 : int32_t limit,
852 : const UnicodeString& srcText,
853 : int32_t srcStart,
854 : int32_t srcLimit,
855 : uint32_t options) const;
856 :
857 : /**
858 : * Determine if this starts with the characters in <TT>text</TT>
859 : * @param text The text to match.
860 : * @return TRUE if this starts with the characters in <TT>text</TT>,
861 : * FALSE otherwise
862 : * @stable ICU 2.0
863 : */
864 : inline UBool startsWith(const UnicodeString& text) const;
865 :
866 : /**
867 : * Determine if this starts with the characters in <TT>srcText</TT>
868 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
869 : * @param srcText The text to match.
870 : * @param srcStart the offset into <TT>srcText</TT> to start matching
871 : * @param srcLength the number of characters in <TT>srcText</TT> to match
872 : * @return TRUE if this starts with the characters in <TT>text</TT>,
873 : * FALSE otherwise
874 : * @stable ICU 2.0
875 : */
876 : inline UBool startsWith(const UnicodeString& srcText,
877 : int32_t srcStart,
878 : int32_t srcLength) const;
879 :
880 : /**
881 : * Determine if this starts with the characters in <TT>srcChars</TT>
882 : * @param srcChars The characters to match.
883 : * @param srcLength the number of characters in <TT>srcChars</TT>
884 : * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
885 : * FALSE otherwise
886 : * @stable ICU 2.0
887 : */
888 : inline UBool startsWith(ConstChar16Ptr srcChars,
889 : int32_t srcLength) const;
890 :
891 : /**
892 : * Determine if this ends with the characters in <TT>srcChars</TT>
893 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
894 : * @param srcChars The characters to match.
895 : * @param srcStart the offset into <TT>srcText</TT> to start matching
896 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
897 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
898 : * @stable ICU 2.0
899 : */
900 : inline UBool startsWith(const char16_t *srcChars,
901 : int32_t srcStart,
902 : int32_t srcLength) const;
903 :
904 : /**
905 : * Determine if this ends with the characters in <TT>text</TT>
906 : * @param text The text to match.
907 : * @return TRUE if this ends with the characters in <TT>text</TT>,
908 : * FALSE otherwise
909 : * @stable ICU 2.0
910 : */
911 : inline UBool endsWith(const UnicodeString& text) const;
912 :
913 : /**
914 : * Determine if this ends with the characters in <TT>srcText</TT>
915 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
916 : * @param srcText The text to match.
917 : * @param srcStart the offset into <TT>srcText</TT> to start matching
918 : * @param srcLength the number of characters in <TT>srcText</TT> to match
919 : * @return TRUE if this ends with the characters in <TT>text</TT>,
920 : * FALSE otherwise
921 : * @stable ICU 2.0
922 : */
923 : inline UBool endsWith(const UnicodeString& srcText,
924 : int32_t srcStart,
925 : int32_t srcLength) const;
926 :
927 : /**
928 : * Determine if this ends with the characters in <TT>srcChars</TT>
929 : * @param srcChars The characters to match.
930 : * @param srcLength the number of characters in <TT>srcChars</TT>
931 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
932 : * FALSE otherwise
933 : * @stable ICU 2.0
934 : */
935 : inline UBool endsWith(ConstChar16Ptr srcChars,
936 : int32_t srcLength) const;
937 :
938 : /**
939 : * Determine if this ends with the characters in <TT>srcChars</TT>
940 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
941 : * @param srcChars The characters to match.
942 : * @param srcStart the offset into <TT>srcText</TT> to start matching
943 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
944 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
945 : * FALSE otherwise
946 : * @stable ICU 2.0
947 : */
948 : inline UBool endsWith(const char16_t *srcChars,
949 : int32_t srcStart,
950 : int32_t srcLength) const;
951 :
952 :
953 : /* Searching - bitwise only */
954 :
955 : /**
956 : * Locate in this the first occurrence of the characters in <TT>text</TT>,
957 : * using bitwise comparison.
958 : * @param text The text to search for.
959 : * @return The offset into this of the start of <TT>text</TT>,
960 : * or -1 if not found.
961 : * @stable ICU 2.0
962 : */
963 : inline int32_t indexOf(const UnicodeString& text) const;
964 :
965 : /**
966 : * Locate in this the first occurrence of the characters in <TT>text</TT>
967 : * starting at offset <TT>start</TT>, using bitwise comparison.
968 : * @param text The text to search for.
969 : * @param start The offset at which searching will start.
970 : * @return The offset into this of the start of <TT>text</TT>,
971 : * or -1 if not found.
972 : * @stable ICU 2.0
973 : */
974 : inline int32_t indexOf(const UnicodeString& text,
975 : int32_t start) const;
976 :
977 : /**
978 : * Locate in this the first occurrence in the range
979 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
980 : * in <TT>text</TT>, using bitwise comparison.
981 : * @param text The text to search for.
982 : * @param start The offset at which searching will start.
983 : * @param length The number of characters to search
984 : * @return The offset into this of the start of <TT>text</TT>,
985 : * or -1 if not found.
986 : * @stable ICU 2.0
987 : */
988 : inline int32_t indexOf(const UnicodeString& text,
989 : int32_t start,
990 : int32_t length) const;
991 :
992 : /**
993 : * Locate in this the first occurrence in the range
994 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
995 : * in <TT>srcText</TT> in the range
996 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
997 : * using bitwise comparison.
998 : * @param srcText The text to search for.
999 : * @param srcStart the offset into <TT>srcText</TT> at which
1000 : * to start matching
1001 : * @param srcLength the number of characters in <TT>srcText</TT> to match
1002 : * @param start the offset into this at which to start matching
1003 : * @param length the number of characters in this to search
1004 : * @return The offset into this of the start of <TT>text</TT>,
1005 : * or -1 if not found.
1006 : * @stable ICU 2.0
1007 : */
1008 : inline int32_t indexOf(const UnicodeString& srcText,
1009 : int32_t srcStart,
1010 : int32_t srcLength,
1011 : int32_t start,
1012 : int32_t length) const;
1013 :
1014 : /**
1015 : * Locate in this the first occurrence of the characters in
1016 : * <TT>srcChars</TT>
1017 : * starting at offset <TT>start</TT>, using bitwise comparison.
1018 : * @param srcChars The text to search for.
1019 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1020 : * @param start the offset into this at which to start matching
1021 : * @return The offset into this of the start of <TT>text</TT>,
1022 : * or -1 if not found.
1023 : * @stable ICU 2.0
1024 : */
1025 : inline int32_t indexOf(const char16_t *srcChars,
1026 : int32_t srcLength,
1027 : int32_t start) const;
1028 :
1029 : /**
1030 : * Locate in this the first occurrence in the range
1031 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1032 : * in <TT>srcChars</TT>, using bitwise comparison.
1033 : * @param srcChars The text to search for.
1034 : * @param srcLength the number of characters in <TT>srcChars</TT>
1035 : * @param start The offset at which searching will start.
1036 : * @param length The number of characters to search
1037 : * @return The offset into this of the start of <TT>srcChars</TT>,
1038 : * or -1 if not found.
1039 : * @stable ICU 2.0
1040 : */
1041 : inline int32_t indexOf(ConstChar16Ptr srcChars,
1042 : int32_t srcLength,
1043 : int32_t start,
1044 : int32_t length) const;
1045 :
1046 : /**
1047 : * Locate in this the first occurrence in the range
1048 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1049 : * in <TT>srcChars</TT> in the range
1050 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1051 : * using bitwise comparison.
1052 : * @param srcChars The text to search for.
1053 : * @param srcStart the offset into <TT>srcChars</TT> at which
1054 : * to start matching
1055 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1056 : * @param start the offset into this at which to start matching
1057 : * @param length the number of characters in this to search
1058 : * @return The offset into this of the start of <TT>text</TT>,
1059 : * or -1 if not found.
1060 : * @stable ICU 2.0
1061 : */
1062 : int32_t indexOf(const char16_t *srcChars,
1063 : int32_t srcStart,
1064 : int32_t srcLength,
1065 : int32_t start,
1066 : int32_t length) const;
1067 :
1068 : /**
1069 : * Locate in this the first occurrence of the BMP code point <code>c</code>,
1070 : * using bitwise comparison.
1071 : * @param c The code unit to search for.
1072 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1073 : * @stable ICU 2.0
1074 : */
1075 : inline int32_t indexOf(char16_t c) const;
1076 :
1077 : /**
1078 : * Locate in this the first occurrence of the code point <TT>c</TT>,
1079 : * using bitwise comparison.
1080 : *
1081 : * @param c The code point to search for.
1082 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1083 : * @stable ICU 2.0
1084 : */
1085 : inline int32_t indexOf(UChar32 c) const;
1086 :
1087 : /**
1088 : * Locate in this the first occurrence of the BMP code point <code>c</code>,
1089 : * starting at offset <TT>start</TT>, using bitwise comparison.
1090 : * @param c The code unit to search for.
1091 : * @param start The offset at which searching will start.
1092 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1093 : * @stable ICU 2.0
1094 : */
1095 : inline int32_t indexOf(char16_t c,
1096 : int32_t start) const;
1097 :
1098 : /**
1099 : * Locate in this the first occurrence of the code point <TT>c</TT>
1100 : * starting at offset <TT>start</TT>, using bitwise comparison.
1101 : *
1102 : * @param c The code point to search for.
1103 : * @param start The offset at which searching will start.
1104 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1105 : * @stable ICU 2.0
1106 : */
1107 : inline int32_t indexOf(UChar32 c,
1108 : int32_t start) const;
1109 :
1110 : /**
1111 : * Locate in this the first occurrence of the BMP code point <code>c</code>
1112 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1113 : * using bitwise comparison.
1114 : * @param c The code unit to search for.
1115 : * @param start the offset into this at which to start matching
1116 : * @param length the number of characters in this to search
1117 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1118 : * @stable ICU 2.0
1119 : */
1120 : inline int32_t indexOf(char16_t c,
1121 : int32_t start,
1122 : int32_t length) const;
1123 :
1124 : /**
1125 : * Locate in this the first occurrence of the code point <TT>c</TT>
1126 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1127 : * using bitwise comparison.
1128 : *
1129 : * @param c The code point to search for.
1130 : * @param start the offset into this at which to start matching
1131 : * @param length the number of characters in this to search
1132 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1133 : * @stable ICU 2.0
1134 : */
1135 : inline int32_t indexOf(UChar32 c,
1136 : int32_t start,
1137 : int32_t length) const;
1138 :
1139 : /**
1140 : * Locate in this the last occurrence of the characters in <TT>text</TT>,
1141 : * using bitwise comparison.
1142 : * @param text The text to search for.
1143 : * @return The offset into this of the start of <TT>text</TT>,
1144 : * or -1 if not found.
1145 : * @stable ICU 2.0
1146 : */
1147 : inline int32_t lastIndexOf(const UnicodeString& text) const;
1148 :
1149 : /**
1150 : * Locate in this the last occurrence of the characters in <TT>text</TT>
1151 : * starting at offset <TT>start</TT>, using bitwise comparison.
1152 : * @param text The text to search for.
1153 : * @param start The offset at which searching will start.
1154 : * @return The offset into this of the start of <TT>text</TT>,
1155 : * or -1 if not found.
1156 : * @stable ICU 2.0
1157 : */
1158 : inline int32_t lastIndexOf(const UnicodeString& text,
1159 : int32_t start) const;
1160 :
1161 : /**
1162 : * Locate in this the last occurrence in the range
1163 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1164 : * in <TT>text</TT>, using bitwise comparison.
1165 : * @param text The text to search for.
1166 : * @param start The offset at which searching will start.
1167 : * @param length The number of characters to search
1168 : * @return The offset into this of the start of <TT>text</TT>,
1169 : * or -1 if not found.
1170 : * @stable ICU 2.0
1171 : */
1172 : inline int32_t lastIndexOf(const UnicodeString& text,
1173 : int32_t start,
1174 : int32_t length) const;
1175 :
1176 : /**
1177 : * Locate in this the last occurrence in the range
1178 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1179 : * in <TT>srcText</TT> in the range
1180 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1181 : * using bitwise comparison.
1182 : * @param srcText The text to search for.
1183 : * @param srcStart the offset into <TT>srcText</TT> at which
1184 : * to start matching
1185 : * @param srcLength the number of characters in <TT>srcText</TT> to match
1186 : * @param start the offset into this at which to start matching
1187 : * @param length the number of characters in this to search
1188 : * @return The offset into this of the start of <TT>text</TT>,
1189 : * or -1 if not found.
1190 : * @stable ICU 2.0
1191 : */
1192 : inline int32_t lastIndexOf(const UnicodeString& srcText,
1193 : int32_t srcStart,
1194 : int32_t srcLength,
1195 : int32_t start,
1196 : int32_t length) const;
1197 :
1198 : /**
1199 : * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1200 : * starting at offset <TT>start</TT>, using bitwise comparison.
1201 : * @param srcChars The text to search for.
1202 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1203 : * @param start the offset into this at which to start matching
1204 : * @return The offset into this of the start of <TT>text</TT>,
1205 : * or -1 if not found.
1206 : * @stable ICU 2.0
1207 : */
1208 : inline int32_t lastIndexOf(const char16_t *srcChars,
1209 : int32_t srcLength,
1210 : int32_t start) const;
1211 :
1212 : /**
1213 : * Locate in this the last occurrence in the range
1214 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1215 : * in <TT>srcChars</TT>, using bitwise comparison.
1216 : * @param srcChars The text to search for.
1217 : * @param srcLength the number of characters in <TT>srcChars</TT>
1218 : * @param start The offset at which searching will start.
1219 : * @param length The number of characters to search
1220 : * @return The offset into this of the start of <TT>srcChars</TT>,
1221 : * or -1 if not found.
1222 : * @stable ICU 2.0
1223 : */
1224 : inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1225 : int32_t srcLength,
1226 : int32_t start,
1227 : int32_t length) const;
1228 :
1229 : /**
1230 : * Locate in this the last occurrence in the range
1231 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1232 : * in <TT>srcChars</TT> in the range
1233 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1234 : * using bitwise comparison.
1235 : * @param srcChars The text to search for.
1236 : * @param srcStart the offset into <TT>srcChars</TT> at which
1237 : * to start matching
1238 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1239 : * @param start the offset into this at which to start matching
1240 : * @param length the number of characters in this to search
1241 : * @return The offset into this of the start of <TT>text</TT>,
1242 : * or -1 if not found.
1243 : * @stable ICU 2.0
1244 : */
1245 : int32_t lastIndexOf(const char16_t *srcChars,
1246 : int32_t srcStart,
1247 : int32_t srcLength,
1248 : int32_t start,
1249 : int32_t length) const;
1250 :
1251 : /**
1252 : * Locate in this the last occurrence of the BMP code point <code>c</code>,
1253 : * using bitwise comparison.
1254 : * @param c The code unit to search for.
1255 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1256 : * @stable ICU 2.0
1257 : */
1258 : inline int32_t lastIndexOf(char16_t c) const;
1259 :
1260 : /**
1261 : * Locate in this the last occurrence of the code point <TT>c</TT>,
1262 : * using bitwise comparison.
1263 : *
1264 : * @param c The code point to search for.
1265 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1266 : * @stable ICU 2.0
1267 : */
1268 : inline int32_t lastIndexOf(UChar32 c) const;
1269 :
1270 : /**
1271 : * Locate in this the last occurrence of the BMP code point <code>c</code>
1272 : * starting at offset <TT>start</TT>, using bitwise comparison.
1273 : * @param c The code unit to search for.
1274 : * @param start The offset at which searching will start.
1275 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1276 : * @stable ICU 2.0
1277 : */
1278 : inline int32_t lastIndexOf(char16_t c,
1279 : int32_t start) const;
1280 :
1281 : /**
1282 : * Locate in this the last occurrence of the code point <TT>c</TT>
1283 : * starting at offset <TT>start</TT>, using bitwise comparison.
1284 : *
1285 : * @param c The code point to search for.
1286 : * @param start The offset at which searching will start.
1287 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1288 : * @stable ICU 2.0
1289 : */
1290 : inline int32_t lastIndexOf(UChar32 c,
1291 : int32_t start) const;
1292 :
1293 : /**
1294 : * Locate in this the last occurrence of the BMP code point <code>c</code>
1295 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1296 : * using bitwise comparison.
1297 : * @param c The code unit to search for.
1298 : * @param start the offset into this at which to start matching
1299 : * @param length the number of characters in this to search
1300 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1301 : * @stable ICU 2.0
1302 : */
1303 : inline int32_t lastIndexOf(char16_t c,
1304 : int32_t start,
1305 : int32_t length) const;
1306 :
1307 : /**
1308 : * Locate in this the last occurrence of the code point <TT>c</TT>
1309 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1310 : * using bitwise comparison.
1311 : *
1312 : * @param c The code point to search for.
1313 : * @param start the offset into this at which to start matching
1314 : * @param length the number of characters in this to search
1315 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1316 : * @stable ICU 2.0
1317 : */
1318 : inline int32_t lastIndexOf(UChar32 c,
1319 : int32_t start,
1320 : int32_t length) const;
1321 :
1322 :
1323 : /* Character access */
1324 :
1325 : /**
1326 : * Return the code unit at offset <tt>offset</tt>.
1327 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1328 : * @param offset a valid offset into the text
1329 : * @return the code unit at offset <tt>offset</tt>
1330 : * or 0xffff if the offset is not valid for this string
1331 : * @stable ICU 2.0
1332 : */
1333 : inline char16_t charAt(int32_t offset) const;
1334 :
1335 : /**
1336 : * Return the code unit at offset <tt>offset</tt>.
1337 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1338 : * @param offset a valid offset into the text
1339 : * @return the code unit at offset <tt>offset</tt>
1340 : * @stable ICU 2.0
1341 : */
1342 : inline char16_t operator[] (int32_t offset) const;
1343 :
1344 : /**
1345 : * Return the code point that contains the code unit
1346 : * at offset <tt>offset</tt>.
1347 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1348 : * @param offset a valid offset into the text
1349 : * that indicates the text offset of any of the code units
1350 : * that will be assembled into a code point (21-bit value) and returned
1351 : * @return the code point of text at <tt>offset</tt>
1352 : * or 0xffff if the offset is not valid for this string
1353 : * @stable ICU 2.0
1354 : */
1355 : UChar32 char32At(int32_t offset) const;
1356 :
1357 : /**
1358 : * Adjust a random-access offset so that
1359 : * it points to the beginning of a Unicode character.
1360 : * The offset that is passed in points to
1361 : * any code unit of a code point,
1362 : * while the returned offset will point to the first code unit
1363 : * of the same code point.
1364 : * In UTF-16, if the input offset points to a second surrogate
1365 : * of a surrogate pair, then the returned offset will point
1366 : * to the first surrogate.
1367 : * @param offset a valid offset into one code point of the text
1368 : * @return offset of the first code unit of the same code point
1369 : * @see U16_SET_CP_START
1370 : * @stable ICU 2.0
1371 : */
1372 : int32_t getChar32Start(int32_t offset) const;
1373 :
1374 : /**
1375 : * Adjust a random-access offset so that
1376 : * it points behind a Unicode character.
1377 : * The offset that is passed in points behind
1378 : * any code unit of a code point,
1379 : * while the returned offset will point behind the last code unit
1380 : * of the same code point.
1381 : * In UTF-16, if the input offset points behind the first surrogate
1382 : * (i.e., to the second surrogate)
1383 : * of a surrogate pair, then the returned offset will point
1384 : * behind the second surrogate (i.e., to the first surrogate).
1385 : * @param offset a valid offset after any code unit of a code point of the text
1386 : * @return offset of the first code unit after the same code point
1387 : * @see U16_SET_CP_LIMIT
1388 : * @stable ICU 2.0
1389 : */
1390 : int32_t getChar32Limit(int32_t offset) const;
1391 :
1392 : /**
1393 : * Move the code unit index along the string by delta code points.
1394 : * Interpret the input index as a code unit-based offset into the string,
1395 : * move the index forward or backward by delta code points, and
1396 : * return the resulting index.
1397 : * The input index should point to the first code unit of a code point,
1398 : * if there is more than one.
1399 : *
1400 : * Both input and output indexes are code unit-based as for all
1401 : * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1402 : * If delta<0 then the index is moved backward (toward the start of the string).
1403 : * If delta>0 then the index is moved forward (toward the end of the string).
1404 : *
1405 : * This behaves like CharacterIterator::move32(delta, kCurrent).
1406 : *
1407 : * Behavior for out-of-bounds indexes:
1408 : * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1409 : * if the input index<0 then it is pinned to 0;
1410 : * if it is index>length() then it is pinned to length().
1411 : * Afterwards, the index is moved by <code>delta</code> code points
1412 : * forward or backward,
1413 : * but no further backward than to 0 and no further forward than to length().
1414 : * The resulting index return value will be in between 0 and length(), inclusively.
1415 : *
1416 : * Examples:
1417 : * <pre>
1418 : * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1419 : * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1420 : *
1421 : * // initial index: position of U+10000
1422 : * int32_t index=1;
1423 : *
1424 : * // the following examples will all result in index==4, position of U+10ffff
1425 : *
1426 : * // skip 2 code points from some position in the string
1427 : * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1428 : *
1429 : * // go to the 3rd code point from the start of s (0-based)
1430 : * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1431 : *
1432 : * // go to the next-to-last code point of s
1433 : * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1434 : * </pre>
1435 : *
1436 : * @param index input code unit index
1437 : * @param delta (signed) code point count to move the index forward or backward
1438 : * in the string
1439 : * @return the resulting code unit index
1440 : * @stable ICU 2.0
1441 : */
1442 : int32_t moveIndex32(int32_t index, int32_t delta) const;
1443 :
1444 : /* Substring extraction */
1445 :
1446 : /**
1447 : * Copy the characters in the range
1448 : * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1449 : * beginning at <tt>dstStart</tt>.
1450 : * If the string aliases to <code>dst</code> itself as an external buffer,
1451 : * then extract() will not copy the contents.
1452 : *
1453 : * @param start offset of first character which will be copied into the array
1454 : * @param length the number of characters to extract
1455 : * @param dst array in which to copy characters. The length of <tt>dst</tt>
1456 : * must be at least (<tt>dstStart + length</tt>).
1457 : * @param dstStart the offset in <TT>dst</TT> where the first character
1458 : * will be extracted
1459 : * @stable ICU 2.0
1460 : */
1461 : inline void extract(int32_t start,
1462 : int32_t length,
1463 : Char16Ptr dst,
1464 : int32_t dstStart = 0) const;
1465 :
1466 : /**
1467 : * Copy the contents of the string into dest.
1468 : * This is a convenience function that
1469 : * checks if there is enough space in dest,
1470 : * extracts the entire string if possible,
1471 : * and NUL-terminates dest if possible.
1472 : *
1473 : * If the string fits into dest but cannot be NUL-terminated
1474 : * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1475 : * If the string itself does not fit into dest
1476 : * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1477 : *
1478 : * If the string aliases to <code>dest</code> itself as an external buffer,
1479 : * then extract() will not copy the contents.
1480 : *
1481 : * @param dest Destination string buffer.
1482 : * @param destCapacity Number of char16_ts available at dest.
1483 : * @param errorCode ICU error code.
1484 : * @return length()
1485 : * @stable ICU 2.0
1486 : */
1487 : int32_t
1488 : extract(Char16Ptr dest, int32_t destCapacity,
1489 : UErrorCode &errorCode) const;
1490 :
1491 : /**
1492 : * Copy the characters in the range
1493 : * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1494 : * <tt>target</tt>.
1495 : * @param start offset of first character which will be copied
1496 : * @param length the number of characters to extract
1497 : * @param target UnicodeString into which to copy characters.
1498 : * @return A reference to <TT>target</TT>
1499 : * @stable ICU 2.0
1500 : */
1501 : inline void extract(int32_t start,
1502 : int32_t length,
1503 : UnicodeString& target) const;
1504 :
1505 : /**
1506 : * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1507 : * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1508 : * @param start offset of first character which will be copied into the array
1509 : * @param limit offset immediately following the last character to be copied
1510 : * @param dst array in which to copy characters. The length of <tt>dst</tt>
1511 : * must be at least (<tt>dstStart + (limit - start)</tt>).
1512 : * @param dstStart the offset in <TT>dst</TT> where the first character
1513 : * will be extracted
1514 : * @stable ICU 2.0
1515 : */
1516 : inline void extractBetween(int32_t start,
1517 : int32_t limit,
1518 : char16_t *dst,
1519 : int32_t dstStart = 0) const;
1520 :
1521 : /**
1522 : * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1523 : * into the UnicodeString <tt>target</tt>. Replaceable API.
1524 : * @param start offset of first character which will be copied
1525 : * @param limit offset immediately following the last character to be copied
1526 : * @param target UnicodeString into which to copy characters.
1527 : * @return A reference to <TT>target</TT>
1528 : * @stable ICU 2.0
1529 : */
1530 : virtual void extractBetween(int32_t start,
1531 : int32_t limit,
1532 : UnicodeString& target) const;
1533 :
1534 : /**
1535 : * Copy the characters in the range
1536 : * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1537 : * All characters must be invariant (see utypes.h).
1538 : * Use US_INV as the last, signature-distinguishing parameter.
1539 : *
1540 : * This function does not write any more than <code>targetCapacity</code>
1541 : * characters but returns the length of the entire output string
1542 : * so that one can allocate a larger buffer and call the function again
1543 : * if necessary.
1544 : * The output string is NUL-terminated if possible.
1545 : *
1546 : * @param start offset of first character which will be copied
1547 : * @param startLength the number of characters to extract
1548 : * @param target the target buffer for extraction, can be NULL
1549 : * if targetLength is 0
1550 : * @param targetCapacity the length of the target buffer
1551 : * @param inv Signature-distinguishing paramater, use US_INV.
1552 : * @return the output string length, not including the terminating NUL
1553 : * @stable ICU 3.2
1554 : */
1555 : int32_t extract(int32_t start,
1556 : int32_t startLength,
1557 : char *target,
1558 : int32_t targetCapacity,
1559 : enum EInvariant inv) const;
1560 :
1561 : #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1562 :
1563 : /**
1564 : * Copy the characters in the range
1565 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1566 : * in the platform's default codepage.
1567 : * This function does not write any more than <code>targetLength</code>
1568 : * characters but returns the length of the entire output string
1569 : * so that one can allocate a larger buffer and call the function again
1570 : * if necessary.
1571 : * The output string is NUL-terminated if possible.
1572 : *
1573 : * @param start offset of first character which will be copied
1574 : * @param startLength the number of characters to extract
1575 : * @param target the target buffer for extraction
1576 : * @param targetLength the length of the target buffer
1577 : * If <TT>target</TT> is NULL, then the number of bytes required for
1578 : * <TT>target</TT> is returned.
1579 : * @return the output string length, not including the terminating NUL
1580 : * @stable ICU 2.0
1581 : */
1582 : int32_t extract(int32_t start,
1583 : int32_t startLength,
1584 : char *target,
1585 : uint32_t targetLength) const;
1586 :
1587 : #endif
1588 :
1589 : #if !UCONFIG_NO_CONVERSION
1590 :
1591 : /**
1592 : * Copy the characters in the range
1593 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1594 : * in a specified codepage.
1595 : * The output string is NUL-terminated.
1596 : *
1597 : * Recommendation: For invariant-character strings use
1598 : * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1599 : * because it avoids object code dependencies of UnicodeString on
1600 : * the conversion code.
1601 : *
1602 : * @param start offset of first character which will be copied
1603 : * @param startLength the number of characters to extract
1604 : * @param target the target buffer for extraction
1605 : * @param codepage the desired codepage for the characters. 0 has
1606 : * the special meaning of the default codepage
1607 : * If <code>codepage</code> is an empty string (<code>""</code>),
1608 : * then a simple conversion is performed on the codepage-invariant
1609 : * subset ("invariant characters") of the platform encoding. See utypes.h.
1610 : * If <TT>target</TT> is NULL, then the number of bytes required for
1611 : * <TT>target</TT> is returned. It is assumed that the target is big enough
1612 : * to fit all of the characters.
1613 : * @return the output string length, not including the terminating NUL
1614 : * @stable ICU 2.0
1615 : */
1616 : inline int32_t extract(int32_t start,
1617 : int32_t startLength,
1618 : char *target,
1619 : const char *codepage = 0) const;
1620 :
1621 : /**
1622 : * Copy the characters in the range
1623 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1624 : * in a specified codepage.
1625 : * This function does not write any more than <code>targetLength</code>
1626 : * characters but returns the length of the entire output string
1627 : * so that one can allocate a larger buffer and call the function again
1628 : * if necessary.
1629 : * The output string is NUL-terminated if possible.
1630 : *
1631 : * Recommendation: For invariant-character strings use
1632 : * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1633 : * because it avoids object code dependencies of UnicodeString on
1634 : * the conversion code.
1635 : *
1636 : * @param start offset of first character which will be copied
1637 : * @param startLength the number of characters to extract
1638 : * @param target the target buffer for extraction
1639 : * @param targetLength the length of the target buffer
1640 : * @param codepage the desired codepage for the characters. 0 has
1641 : * the special meaning of the default codepage
1642 : * If <code>codepage</code> is an empty string (<code>""</code>),
1643 : * then a simple conversion is performed on the codepage-invariant
1644 : * subset ("invariant characters") of the platform encoding. See utypes.h.
1645 : * If <TT>target</TT> is NULL, then the number of bytes required for
1646 : * <TT>target</TT> is returned.
1647 : * @return the output string length, not including the terminating NUL
1648 : * @stable ICU 2.0
1649 : */
1650 : int32_t extract(int32_t start,
1651 : int32_t startLength,
1652 : char *target,
1653 : uint32_t targetLength,
1654 : const char *codepage) const;
1655 :
1656 : /**
1657 : * Convert the UnicodeString into a codepage string using an existing UConverter.
1658 : * The output string is NUL-terminated if possible.
1659 : *
1660 : * This function avoids the overhead of opening and closing a converter if
1661 : * multiple strings are extracted.
1662 : *
1663 : * @param dest destination string buffer, can be NULL if destCapacity==0
1664 : * @param destCapacity the number of chars available at dest
1665 : * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1666 : * or NULL for the default converter
1667 : * @param errorCode normal ICU error code
1668 : * @return the length of the output string, not counting the terminating NUL;
1669 : * if the length is greater than destCapacity, then the string will not fit
1670 : * and a buffer of the indicated length would need to be passed in
1671 : * @stable ICU 2.0
1672 : */
1673 : int32_t extract(char *dest, int32_t destCapacity,
1674 : UConverter *cnv,
1675 : UErrorCode &errorCode) const;
1676 :
1677 : #endif
1678 :
1679 : /**
1680 : * Create a temporary substring for the specified range.
1681 : * Unlike the substring constructor and setTo() functions,
1682 : * the object returned here will be a read-only alias (using getBuffer())
1683 : * rather than copying the text.
1684 : * As a result, this substring operation is much faster but requires
1685 : * that the original string not be modified or deleted during the lifetime
1686 : * of the returned substring object.
1687 : * @param start offset of the first character visible in the substring
1688 : * @param length length of the substring
1689 : * @return a read-only alias UnicodeString object for the substring
1690 : * @stable ICU 4.4
1691 : */
1692 : UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1693 :
1694 : /**
1695 : * Create a temporary substring for the specified range.
1696 : * Same as tempSubString(start, length) except that the substring range
1697 : * is specified as a (start, limit) pair (with an exclusive limit index)
1698 : * rather than a (start, length) pair.
1699 : * @param start offset of the first character visible in the substring
1700 : * @param limit offset immediately following the last character visible in the substring
1701 : * @return a read-only alias UnicodeString object for the substring
1702 : * @stable ICU 4.4
1703 : */
1704 : inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1705 :
1706 : /**
1707 : * Convert the UnicodeString to UTF-8 and write the result
1708 : * to a ByteSink. This is called by toUTF8String().
1709 : * Unpaired surrogates are replaced with U+FFFD.
1710 : * Calls u_strToUTF8WithSub().
1711 : *
1712 : * @param sink A ByteSink to which the UTF-8 version of the string is written.
1713 : * sink.Flush() is called at the end.
1714 : * @stable ICU 4.2
1715 : * @see toUTF8String
1716 : */
1717 : void toUTF8(ByteSink &sink) const;
1718 :
1719 : /**
1720 : * Convert the UnicodeString to UTF-8 and append the result
1721 : * to a standard string.
1722 : * Unpaired surrogates are replaced with U+FFFD.
1723 : * Calls toUTF8().
1724 : *
1725 : * @param result A standard string (or a compatible object)
1726 : * to which the UTF-8 version of the string is appended.
1727 : * @return The string object.
1728 : * @stable ICU 4.2
1729 : * @see toUTF8
1730 : */
1731 : template<typename StringClass>
1732 : StringClass &toUTF8String(StringClass &result) const {
1733 : StringByteSink<StringClass> sbs(&result);
1734 : toUTF8(sbs);
1735 : return result;
1736 : }
1737 :
1738 : /**
1739 : * Convert the UnicodeString to UTF-32.
1740 : * Unpaired surrogates are replaced with U+FFFD.
1741 : * Calls u_strToUTF32WithSub().
1742 : *
1743 : * @param utf32 destination string buffer, can be NULL if capacity==0
1744 : * @param capacity the number of UChar32s available at utf32
1745 : * @param errorCode Standard ICU error code. Its input value must
1746 : * pass the U_SUCCESS() test, or else the function returns
1747 : * immediately. Check for U_FAILURE() on output or use with
1748 : * function chaining. (See User Guide for details.)
1749 : * @return The length of the UTF-32 string.
1750 : * @see fromUTF32
1751 : * @stable ICU 4.2
1752 : */
1753 : int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1754 :
1755 : /* Length operations */
1756 :
1757 : /**
1758 : * Return the length of the UnicodeString object.
1759 : * The length is the number of char16_t code units are in the UnicodeString.
1760 : * If you want the number of code points, please use countChar32().
1761 : * @return the length of the UnicodeString object
1762 : * @see countChar32
1763 : * @stable ICU 2.0
1764 : */
1765 : inline int32_t length(void) const;
1766 :
1767 : /**
1768 : * Count Unicode code points in the length char16_t code units of the string.
1769 : * A code point may occupy either one or two char16_t code units.
1770 : * Counting code points involves reading all code units.
1771 : *
1772 : * This functions is basically the inverse of moveIndex32().
1773 : *
1774 : * @param start the index of the first code unit to check
1775 : * @param length the number of char16_t code units to check
1776 : * @return the number of code points in the specified code units
1777 : * @see length
1778 : * @stable ICU 2.0
1779 : */
1780 : int32_t
1781 : countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1782 :
1783 : /**
1784 : * Check if the length char16_t code units of the string
1785 : * contain more Unicode code points than a certain number.
1786 : * This is more efficient than counting all code points in this part of the string
1787 : * and comparing that number with a threshold.
1788 : * This function may not need to scan the string at all if the length
1789 : * falls within a certain range, and
1790 : * never needs to count more than 'number+1' code points.
1791 : * Logically equivalent to (countChar32(start, length)>number).
1792 : * A Unicode code point may occupy either one or two char16_t code units.
1793 : *
1794 : * @param start the index of the first code unit to check (0 for the entire string)
1795 : * @param length the number of char16_t code units to check
1796 : * (use INT32_MAX for the entire string; remember that start/length
1797 : * values are pinned)
1798 : * @param number The number of code points in the (sub)string is compared against
1799 : * the 'number' parameter.
1800 : * @return Boolean value for whether the string contains more Unicode code points
1801 : * than 'number'. Same as (u_countChar32(s, length)>number).
1802 : * @see countChar32
1803 : * @see u_strHasMoreChar32Than
1804 : * @stable ICU 2.4
1805 : */
1806 : UBool
1807 : hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1808 :
1809 : /**
1810 : * Determine if this string is empty.
1811 : * @return TRUE if this string contains 0 characters, FALSE otherwise.
1812 : * @stable ICU 2.0
1813 : */
1814 : inline UBool isEmpty(void) const;
1815 :
1816 : /**
1817 : * Return the capacity of the internal buffer of the UnicodeString object.
1818 : * This is useful together with the getBuffer functions.
1819 : * See there for details.
1820 : *
1821 : * @return the number of char16_ts available in the internal buffer
1822 : * @see getBuffer
1823 : * @stable ICU 2.0
1824 : */
1825 : inline int32_t getCapacity(void) const;
1826 :
1827 : /* Other operations */
1828 :
1829 : /**
1830 : * Generate a hash code for this object.
1831 : * @return The hash code of this UnicodeString.
1832 : * @stable ICU 2.0
1833 : */
1834 : inline int32_t hashCode(void) const;
1835 :
1836 : /**
1837 : * Determine if this object contains a valid string.
1838 : * A bogus string has no value. It is different from an empty string,
1839 : * although in both cases isEmpty() returns TRUE and length() returns 0.
1840 : * setToBogus() and isBogus() can be used to indicate that no string value is available.
1841 : * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1842 : * length() returns 0.
1843 : *
1844 : * @return TRUE if the string is bogus/invalid, FALSE otherwise
1845 : * @see setToBogus()
1846 : * @stable ICU 2.0
1847 : */
1848 : inline UBool isBogus(void) const;
1849 :
1850 :
1851 : //========================================
1852 : // Write operations
1853 : //========================================
1854 :
1855 : /* Assignment operations */
1856 :
1857 : /**
1858 : * Assignment operator. Replace the characters in this UnicodeString
1859 : * with the characters from <TT>srcText</TT>.
1860 : *
1861 : * Starting with ICU 2.4, the assignment operator and the copy constructor
1862 : * allocate a new buffer and copy the buffer contents even for readonly aliases.
1863 : * By contrast, the fastCopyFrom() function implements the old,
1864 : * more efficient but less safe behavior
1865 : * of making this string also a readonly alias to the same buffer.
1866 : *
1867 : * If the source object has an "open" buffer from getBuffer(minCapacity),
1868 : * then the copy is an empty string.
1869 : *
1870 : * @param srcText The text containing the characters to replace
1871 : * @return a reference to this
1872 : * @stable ICU 2.0
1873 : * @see fastCopyFrom
1874 : */
1875 : UnicodeString &operator=(const UnicodeString &srcText);
1876 :
1877 : /**
1878 : * Almost the same as the assignment operator.
1879 : * Replace the characters in this UnicodeString
1880 : * with the characters from <code>srcText</code>.
1881 : *
1882 : * This function works the same as the assignment operator
1883 : * for all strings except for ones that are readonly aliases.
1884 : *
1885 : * Starting with ICU 2.4, the assignment operator and the copy constructor
1886 : * allocate a new buffer and copy the buffer contents even for readonly aliases.
1887 : * This function implements the old, more efficient but less safe behavior
1888 : * of making this string also a readonly alias to the same buffer.
1889 : *
1890 : * The fastCopyFrom function must be used only if it is known that the lifetime of
1891 : * this UnicodeString does not exceed the lifetime of the aliased buffer
1892 : * including its contents, for example for strings from resource bundles
1893 : * or aliases to string constants.
1894 : *
1895 : * If the source object has an "open" buffer from getBuffer(minCapacity),
1896 : * then the copy is an empty string.
1897 : *
1898 : * @param src The text containing the characters to replace.
1899 : * @return a reference to this
1900 : * @stable ICU 2.4
1901 : */
1902 : UnicodeString &fastCopyFrom(const UnicodeString &src);
1903 :
1904 : #if U_HAVE_RVALUE_REFERENCES
1905 : /**
1906 : * Move assignment operator, might leave src in bogus state.
1907 : * This string will have the same contents and state that the source string had.
1908 : * The behavior is undefined if *this and src are the same object.
1909 : * @param src source string
1910 : * @return *this
1911 : * @stable ICU 56
1912 : */
1913 0 : UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1914 0 : return moveFrom(src);
1915 : }
1916 : #endif
1917 : // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
1918 : /**
1919 : * Move assignment, might leave src in bogus state.
1920 : * This string will have the same contents and state that the source string had.
1921 : * The behavior is undefined if *this and src are the same object.
1922 : *
1923 : * Can be called explicitly, does not need C++11 support.
1924 : * @param src source string
1925 : * @return *this
1926 : * @draft ICU 56
1927 : */
1928 : UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1929 :
1930 : /**
1931 : * Swap strings.
1932 : * @param other other string
1933 : * @stable ICU 56
1934 : */
1935 : void swap(UnicodeString &other) U_NOEXCEPT;
1936 :
1937 : /**
1938 : * Non-member UnicodeString swap function.
1939 : * @param s1 will get s2's contents and state
1940 : * @param s2 will get s1's contents and state
1941 : * @stable ICU 56
1942 : */
1943 : friend U_COMMON_API inline void U_EXPORT2
1944 : swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1945 : s1.swap(s2);
1946 : }
1947 :
1948 : /**
1949 : * Assignment operator. Replace the characters in this UnicodeString
1950 : * with the code unit <TT>ch</TT>.
1951 : * @param ch the code unit to replace
1952 : * @return a reference to this
1953 : * @stable ICU 2.0
1954 : */
1955 : inline UnicodeString& operator= (char16_t ch);
1956 :
1957 : /**
1958 : * Assignment operator. Replace the characters in this UnicodeString
1959 : * with the code point <TT>ch</TT>.
1960 : * @param ch the code point to replace
1961 : * @return a reference to this
1962 : * @stable ICU 2.0
1963 : */
1964 : inline UnicodeString& operator= (UChar32 ch);
1965 :
1966 : /**
1967 : * Set the text in the UnicodeString object to the characters
1968 : * in <TT>srcText</TT> in the range
1969 : * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1970 : * <TT>srcText</TT> is not modified.
1971 : * @param srcText the source for the new characters
1972 : * @param srcStart the offset into <TT>srcText</TT> where new characters
1973 : * will be obtained
1974 : * @return a reference to this
1975 : * @stable ICU 2.2
1976 : */
1977 : inline UnicodeString& setTo(const UnicodeString& srcText,
1978 : int32_t srcStart);
1979 :
1980 : /**
1981 : * Set the text in the UnicodeString object to the characters
1982 : * in <TT>srcText</TT> in the range
1983 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1984 : * <TT>srcText</TT> is not modified.
1985 : * @param srcText the source for the new characters
1986 : * @param srcStart the offset into <TT>srcText</TT> where new characters
1987 : * will be obtained
1988 : * @param srcLength the number of characters in <TT>srcText</TT> in the
1989 : * replace string.
1990 : * @return a reference to this
1991 : * @stable ICU 2.0
1992 : */
1993 : inline UnicodeString& setTo(const UnicodeString& srcText,
1994 : int32_t srcStart,
1995 : int32_t srcLength);
1996 :
1997 : /**
1998 : * Set the text in the UnicodeString object to the characters in
1999 : * <TT>srcText</TT>.
2000 : * <TT>srcText</TT> is not modified.
2001 : * @param srcText the source for the new characters
2002 : * @return a reference to this
2003 : * @stable ICU 2.0
2004 : */
2005 : inline UnicodeString& setTo(const UnicodeString& srcText);
2006 :
2007 : /**
2008 : * Set the characters in the UnicodeString object to the characters
2009 : * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2010 : * @param srcChars the source for the new characters
2011 : * @param srcLength the number of Unicode characters in srcChars.
2012 : * @return a reference to this
2013 : * @stable ICU 2.0
2014 : */
2015 : inline UnicodeString& setTo(const char16_t *srcChars,
2016 : int32_t srcLength);
2017 :
2018 : /**
2019 : * Set the characters in the UnicodeString object to the code unit
2020 : * <TT>srcChar</TT>.
2021 : * @param srcChar the code unit which becomes the UnicodeString's character
2022 : * content
2023 : * @return a reference to this
2024 : * @stable ICU 2.0
2025 : */
2026 : UnicodeString& setTo(char16_t srcChar);
2027 :
2028 : /**
2029 : * Set the characters in the UnicodeString object to the code point
2030 : * <TT>srcChar</TT>.
2031 : * @param srcChar the code point which becomes the UnicodeString's character
2032 : * content
2033 : * @return a reference to this
2034 : * @stable ICU 2.0
2035 : */
2036 : UnicodeString& setTo(UChar32 srcChar);
2037 :
2038 : /**
2039 : * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
2040 : * The text will be used for the UnicodeString object, but
2041 : * it will not be released when the UnicodeString is destroyed.
2042 : * This has copy-on-write semantics:
2043 : * When the string is modified, then the buffer is first copied into
2044 : * newly allocated memory.
2045 : * The aliased buffer is never modified.
2046 : *
2047 : * In an assignment to another UnicodeString, when using the copy constructor
2048 : * or the assignment operator, the text will be copied.
2049 : * When using fastCopyFrom(), the text will be aliased again,
2050 : * so that both strings then alias the same readonly-text.
2051 : *
2052 : * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2053 : * This must be true if <code>textLength==-1</code>.
2054 : * @param text The characters to alias for the UnicodeString.
2055 : * @param textLength The number of Unicode characters in <code>text</code> to alias.
2056 : * If -1, then this constructor will determine the length
2057 : * by calling <code>u_strlen()</code>.
2058 : * @return a reference to this
2059 : * @stable ICU 2.0
2060 : */
2061 : UnicodeString &setTo(UBool isTerminated,
2062 : ConstChar16Ptr text,
2063 : int32_t textLength);
2064 :
2065 : /**
2066 : * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
2067 : * The text will be used for the UnicodeString object, but
2068 : * it will not be released when the UnicodeString is destroyed.
2069 : * This has write-through semantics:
2070 : * For as long as the capacity of the buffer is sufficient, write operations
2071 : * will directly affect the buffer. When more capacity is necessary, then
2072 : * a new buffer will be allocated and the contents copied as with regularly
2073 : * constructed strings.
2074 : * In an assignment to another UnicodeString, the buffer will be copied.
2075 : * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
2076 : * as the string buffer itself and will in this case not copy the contents.
2077 : *
2078 : * @param buffer The characters to alias for the UnicodeString.
2079 : * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2080 : * @param buffCapacity The size of <code>buffer</code> in char16_ts.
2081 : * @return a reference to this
2082 : * @stable ICU 2.0
2083 : */
2084 : UnicodeString &setTo(char16_t *buffer,
2085 : int32_t buffLength,
2086 : int32_t buffCapacity);
2087 :
2088 : /**
2089 : * Make this UnicodeString object invalid.
2090 : * The string will test TRUE with isBogus().
2091 : *
2092 : * A bogus string has no value. It is different from an empty string.
2093 : * It can be used to indicate that no string value is available.
2094 : * getBuffer() and getTerminatedBuffer() return NULL, and
2095 : * length() returns 0.
2096 : *
2097 : * This utility function is used throughout the UnicodeString
2098 : * implementation to indicate that a UnicodeString operation failed,
2099 : * and may be used in other functions,
2100 : * especially but not exclusively when such functions do not
2101 : * take a UErrorCode for simplicity.
2102 : *
2103 : * The following methods, and no others, will clear a string object's bogus flag:
2104 : * - remove()
2105 : * - remove(0, INT32_MAX)
2106 : * - truncate(0)
2107 : * - operator=() (assignment operator)
2108 : * - setTo(...)
2109 : *
2110 : * The simplest ways to turn a bogus string into an empty one
2111 : * is to use the remove() function.
2112 : * Examples for other functions that are equivalent to "set to empty string":
2113 : * \code
2114 : * if(s.isBogus()) {
2115 : * s.remove(); // set to an empty string (remove all), or
2116 : * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2117 : * s.truncate(0); // set to an empty string (complete truncation), or
2118 : * s=UnicodeString(); // assign an empty string, or
2119 : * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2120 : * static const char16_t nul=0;
2121 : * s.setTo(&nul, 0); // set to an empty C Unicode string
2122 : * }
2123 : * \endcode
2124 : *
2125 : * @see isBogus()
2126 : * @stable ICU 2.0
2127 : */
2128 : void setToBogus();
2129 :
2130 : /**
2131 : * Set the character at the specified offset to the specified character.
2132 : * @param offset A valid offset into the text of the character to set
2133 : * @param ch The new character
2134 : * @return A reference to this
2135 : * @stable ICU 2.0
2136 : */
2137 : UnicodeString& setCharAt(int32_t offset,
2138 : char16_t ch);
2139 :
2140 :
2141 : /* Append operations */
2142 :
2143 : /**
2144 : * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2145 : * object.
2146 : * @param ch the code unit to be appended
2147 : * @return a reference to this
2148 : * @stable ICU 2.0
2149 : */
2150 : inline UnicodeString& operator+= (char16_t ch);
2151 :
2152 : /**
2153 : * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2154 : * object.
2155 : * @param ch the code point to be appended
2156 : * @return a reference to this
2157 : * @stable ICU 2.0
2158 : */
2159 : inline UnicodeString& operator+= (UChar32 ch);
2160 :
2161 : /**
2162 : * Append operator. Append the characters in <TT>srcText</TT> to the
2163 : * UnicodeString object. <TT>srcText</TT> is not modified.
2164 : * @param srcText the source for the new characters
2165 : * @return a reference to this
2166 : * @stable ICU 2.0
2167 : */
2168 : inline UnicodeString& operator+= (const UnicodeString& srcText);
2169 :
2170 : /**
2171 : * Append the characters
2172 : * in <TT>srcText</TT> in the range
2173 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2174 : * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2175 : * is not modified.
2176 : * @param srcText the source for the new characters
2177 : * @param srcStart the offset into <TT>srcText</TT> where new characters
2178 : * will be obtained
2179 : * @param srcLength the number of characters in <TT>srcText</TT> in
2180 : * the append string
2181 : * @return a reference to this
2182 : * @stable ICU 2.0
2183 : */
2184 : inline UnicodeString& append(const UnicodeString& srcText,
2185 : int32_t srcStart,
2186 : int32_t srcLength);
2187 :
2188 : /**
2189 : * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2190 : * <TT>srcText</TT> is not modified.
2191 : * @param srcText the source for the new characters
2192 : * @return a reference to this
2193 : * @stable ICU 2.0
2194 : */
2195 : inline UnicodeString& append(const UnicodeString& srcText);
2196 :
2197 : /**
2198 : * Append the characters in <TT>srcChars</TT> in the range
2199 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2200 : * object at offset
2201 : * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2202 : * @param srcChars the source for the new characters
2203 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2204 : * will be obtained
2205 : * @param srcLength the number of characters in <TT>srcChars</TT> in
2206 : * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2207 : * @return a reference to this
2208 : * @stable ICU 2.0
2209 : */
2210 : inline UnicodeString& append(const char16_t *srcChars,
2211 : int32_t srcStart,
2212 : int32_t srcLength);
2213 :
2214 : /**
2215 : * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2216 : * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2217 : * @param srcChars the source for the new characters
2218 : * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2219 : * can be -1 if <TT>srcChars</TT> is NUL-terminated
2220 : * @return a reference to this
2221 : * @stable ICU 2.0
2222 : */
2223 : inline UnicodeString& append(ConstChar16Ptr srcChars,
2224 : int32_t srcLength);
2225 :
2226 : /**
2227 : * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2228 : * @param srcChar the code unit to append
2229 : * @return a reference to this
2230 : * @stable ICU 2.0
2231 : */
2232 : inline UnicodeString& append(char16_t srcChar);
2233 :
2234 : /**
2235 : * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2236 : * @param srcChar the code point to append
2237 : * @return a reference to this
2238 : * @stable ICU 2.0
2239 : */
2240 : UnicodeString& append(UChar32 srcChar);
2241 :
2242 :
2243 : /* Insert operations */
2244 :
2245 : /**
2246 : * Insert the characters in <TT>srcText</TT> in the range
2247 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2248 : * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2249 : * @param start the offset where the insertion begins
2250 : * @param srcText the source for the new characters
2251 : * @param srcStart the offset into <TT>srcText</TT> where new characters
2252 : * will be obtained
2253 : * @param srcLength the number of characters in <TT>srcText</TT> in
2254 : * the insert string
2255 : * @return a reference to this
2256 : * @stable ICU 2.0
2257 : */
2258 : inline UnicodeString& insert(int32_t start,
2259 : const UnicodeString& srcText,
2260 : int32_t srcStart,
2261 : int32_t srcLength);
2262 :
2263 : /**
2264 : * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2265 : * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2266 : * @param start the offset where the insertion begins
2267 : * @param srcText the source for the new characters
2268 : * @return a reference to this
2269 : * @stable ICU 2.0
2270 : */
2271 : inline UnicodeString& insert(int32_t start,
2272 : const UnicodeString& srcText);
2273 :
2274 : /**
2275 : * Insert the characters in <TT>srcChars</TT> in the range
2276 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2277 : * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2278 : * @param start the offset at which the insertion begins
2279 : * @param srcChars the source for the new characters
2280 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2281 : * will be obtained
2282 : * @param srcLength the number of characters in <TT>srcChars</TT>
2283 : * in the insert string
2284 : * @return a reference to this
2285 : * @stable ICU 2.0
2286 : */
2287 : inline UnicodeString& insert(int32_t start,
2288 : const char16_t *srcChars,
2289 : int32_t srcStart,
2290 : int32_t srcLength);
2291 :
2292 : /**
2293 : * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2294 : * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2295 : * @param start the offset where the insertion begins
2296 : * @param srcChars the source for the new characters
2297 : * @param srcLength the number of Unicode characters in srcChars.
2298 : * @return a reference to this
2299 : * @stable ICU 2.0
2300 : */
2301 : inline UnicodeString& insert(int32_t start,
2302 : ConstChar16Ptr srcChars,
2303 : int32_t srcLength);
2304 :
2305 : /**
2306 : * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2307 : * offset <TT>start</TT>.
2308 : * @param start the offset at which the insertion occurs
2309 : * @param srcChar the code unit to insert
2310 : * @return a reference to this
2311 : * @stable ICU 2.0
2312 : */
2313 : inline UnicodeString& insert(int32_t start,
2314 : char16_t srcChar);
2315 :
2316 : /**
2317 : * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2318 : * offset <TT>start</TT>.
2319 : * @param start the offset at which the insertion occurs
2320 : * @param srcChar the code point to insert
2321 : * @return a reference to this
2322 : * @stable ICU 2.0
2323 : */
2324 : inline UnicodeString& insert(int32_t start,
2325 : UChar32 srcChar);
2326 :
2327 :
2328 : /* Replace operations */
2329 :
2330 : /**
2331 : * Replace the characters in the range
2332 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2333 : * <TT>srcText</TT> in the range
2334 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2335 : * <TT>srcText</TT> is not modified.
2336 : * @param start the offset at which the replace operation begins
2337 : * @param length the number of characters to replace. The character at
2338 : * <TT>start + length</TT> is not modified.
2339 : * @param srcText the source for the new characters
2340 : * @param srcStart the offset into <TT>srcText</TT> where new characters
2341 : * will be obtained
2342 : * @param srcLength the number of characters in <TT>srcText</TT> in
2343 : * the replace string
2344 : * @return a reference to this
2345 : * @stable ICU 2.0
2346 : */
2347 : UnicodeString& replace(int32_t start,
2348 : int32_t length,
2349 : const UnicodeString& srcText,
2350 : int32_t srcStart,
2351 : int32_t srcLength);
2352 :
2353 : /**
2354 : * Replace the characters in the range
2355 : * [<TT>start</TT>, <TT>start + length</TT>)
2356 : * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2357 : * not modified.
2358 : * @param start the offset at which the replace operation begins
2359 : * @param length the number of characters to replace. The character at
2360 : * <TT>start + length</TT> is not modified.
2361 : * @param srcText the source for the new characters
2362 : * @return a reference to this
2363 : * @stable ICU 2.0
2364 : */
2365 : UnicodeString& replace(int32_t start,
2366 : int32_t length,
2367 : const UnicodeString& srcText);
2368 :
2369 : /**
2370 : * Replace the characters in the range
2371 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2372 : * <TT>srcChars</TT> in the range
2373 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2374 : * is not modified.
2375 : * @param start the offset at which the replace operation begins
2376 : * @param length the number of characters to replace. The character at
2377 : * <TT>start + length</TT> is not modified.
2378 : * @param srcChars the source for the new characters
2379 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2380 : * will be obtained
2381 : * @param srcLength the number of characters in <TT>srcChars</TT>
2382 : * in the replace string
2383 : * @return a reference to this
2384 : * @stable ICU 2.0
2385 : */
2386 : UnicodeString& replace(int32_t start,
2387 : int32_t length,
2388 : const char16_t *srcChars,
2389 : int32_t srcStart,
2390 : int32_t srcLength);
2391 :
2392 : /**
2393 : * Replace the characters in the range
2394 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2395 : * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2396 : * @param start the offset at which the replace operation begins
2397 : * @param length number of characters to replace. The character at
2398 : * <TT>start + length</TT> is not modified.
2399 : * @param srcChars the source for the new characters
2400 : * @param srcLength the number of Unicode characters in srcChars
2401 : * @return a reference to this
2402 : * @stable ICU 2.0
2403 : */
2404 : inline UnicodeString& replace(int32_t start,
2405 : int32_t length,
2406 : ConstChar16Ptr srcChars,
2407 : int32_t srcLength);
2408 :
2409 : /**
2410 : * Replace the characters in the range
2411 : * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2412 : * <TT>srcChar</TT>.
2413 : * @param start the offset at which the replace operation begins
2414 : * @param length the number of characters to replace. The character at
2415 : * <TT>start + length</TT> is not modified.
2416 : * @param srcChar the new code unit
2417 : * @return a reference to this
2418 : * @stable ICU 2.0
2419 : */
2420 : inline UnicodeString& replace(int32_t start,
2421 : int32_t length,
2422 : char16_t srcChar);
2423 :
2424 : /**
2425 : * Replace the characters in the range
2426 : * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2427 : * <TT>srcChar</TT>.
2428 : * @param start the offset at which the replace operation begins
2429 : * @param length the number of characters to replace. The character at
2430 : * <TT>start + length</TT> is not modified.
2431 : * @param srcChar the new code point
2432 : * @return a reference to this
2433 : * @stable ICU 2.0
2434 : */
2435 : UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2436 :
2437 : /**
2438 : * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2439 : * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2440 : * @param start the offset at which the replace operation begins
2441 : * @param limit the offset immediately following the replace range
2442 : * @param srcText the source for the new characters
2443 : * @return a reference to this
2444 : * @stable ICU 2.0
2445 : */
2446 : inline UnicodeString& replaceBetween(int32_t start,
2447 : int32_t limit,
2448 : const UnicodeString& srcText);
2449 :
2450 : /**
2451 : * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2452 : * with the characters in <TT>srcText</TT> in the range
2453 : * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2454 : * @param start the offset at which the replace operation begins
2455 : * @param limit the offset immediately following the replace range
2456 : * @param srcText the source for the new characters
2457 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2458 : * will be obtained
2459 : * @param srcLimit the offset immediately following the range to copy
2460 : * in <TT>srcText</TT>
2461 : * @return a reference to this
2462 : * @stable ICU 2.0
2463 : */
2464 : inline UnicodeString& replaceBetween(int32_t start,
2465 : int32_t limit,
2466 : const UnicodeString& srcText,
2467 : int32_t srcStart,
2468 : int32_t srcLimit);
2469 :
2470 : /**
2471 : * Replace a substring of this object with the given text.
2472 : * @param start the beginning index, inclusive; <code>0 <= start
2473 : * <= limit</code>.
2474 : * @param limit the ending index, exclusive; <code>start <= limit
2475 : * <= length()</code>.
2476 : * @param text the text to replace characters <code>start</code>
2477 : * to <code>limit - 1</code>
2478 : * @stable ICU 2.0
2479 : */
2480 : virtual void handleReplaceBetween(int32_t start,
2481 : int32_t limit,
2482 : const UnicodeString& text);
2483 :
2484 : /**
2485 : * Replaceable API
2486 : * @return TRUE if it has MetaData
2487 : * @stable ICU 2.4
2488 : */
2489 : virtual UBool hasMetaData() const;
2490 :
2491 : /**
2492 : * Copy a substring of this object, retaining attribute (out-of-band)
2493 : * information. This method is used to duplicate or reorder substrings.
2494 : * The destination index must not overlap the source range.
2495 : *
2496 : * @param start the beginning index, inclusive; <code>0 <= start <=
2497 : * limit</code>.
2498 : * @param limit the ending index, exclusive; <code>start <= limit <=
2499 : * length()</code>.
2500 : * @param dest the destination index. The characters from
2501 : * <code>start..limit-1</code> will be copied to <code>dest</code>.
2502 : * Implementations of this method may assume that <code>dest <= start ||
2503 : * dest >= limit</code>.
2504 : * @stable ICU 2.0
2505 : */
2506 : virtual void copy(int32_t start, int32_t limit, int32_t dest);
2507 :
2508 : /* Search and replace operations */
2509 :
2510 : /**
2511 : * Replace all occurrences of characters in oldText with the characters
2512 : * in newText
2513 : * @param oldText the text containing the search text
2514 : * @param newText the text containing the replacement text
2515 : * @return a reference to this
2516 : * @stable ICU 2.0
2517 : */
2518 : inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2519 : const UnicodeString& newText);
2520 :
2521 : /**
2522 : * Replace all occurrences of characters in oldText with characters
2523 : * in newText
2524 : * in the range [<TT>start</TT>, <TT>start + length</TT>).
2525 : * @param start the start of the range in which replace will performed
2526 : * @param length the length of the range in which replace will be performed
2527 : * @param oldText the text containing the search text
2528 : * @param newText the text containing the replacement text
2529 : * @return a reference to this
2530 : * @stable ICU 2.0
2531 : */
2532 : inline UnicodeString& findAndReplace(int32_t start,
2533 : int32_t length,
2534 : const UnicodeString& oldText,
2535 : const UnicodeString& newText);
2536 :
2537 : /**
2538 : * Replace all occurrences of characters in oldText in the range
2539 : * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2540 : * in newText in the range
2541 : * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2542 : * in the range [<TT>start</TT>, <TT>start + length</TT>).
2543 : * @param start the start of the range in which replace will performed
2544 : * @param length the length of the range in which replace will be performed
2545 : * @param oldText the text containing the search text
2546 : * @param oldStart the start of the search range in <TT>oldText</TT>
2547 : * @param oldLength the length of the search range in <TT>oldText</TT>
2548 : * @param newText the text containing the replacement text
2549 : * @param newStart the start of the replacement range in <TT>newText</TT>
2550 : * @param newLength the length of the replacement range in <TT>newText</TT>
2551 : * @return a reference to this
2552 : * @stable ICU 2.0
2553 : */
2554 : UnicodeString& findAndReplace(int32_t start,
2555 : int32_t length,
2556 : const UnicodeString& oldText,
2557 : int32_t oldStart,
2558 : int32_t oldLength,
2559 : const UnicodeString& newText,
2560 : int32_t newStart,
2561 : int32_t newLength);
2562 :
2563 :
2564 : /* Remove operations */
2565 :
2566 : /**
2567 : * Remove all characters from the UnicodeString object.
2568 : * @return a reference to this
2569 : * @stable ICU 2.0
2570 : */
2571 : inline UnicodeString& remove(void);
2572 :
2573 : /**
2574 : * Remove the characters in the range
2575 : * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2576 : * @param start the offset of the first character to remove
2577 : * @param length the number of characters to remove
2578 : * @return a reference to this
2579 : * @stable ICU 2.0
2580 : */
2581 : inline UnicodeString& remove(int32_t start,
2582 : int32_t length = (int32_t)INT32_MAX);
2583 :
2584 : /**
2585 : * Remove the characters in the range
2586 : * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2587 : * @param start the offset of the first character to remove
2588 : * @param limit the offset immediately following the range to remove
2589 : * @return a reference to this
2590 : * @stable ICU 2.0
2591 : */
2592 : inline UnicodeString& removeBetween(int32_t start,
2593 : int32_t limit = (int32_t)INT32_MAX);
2594 :
2595 : /**
2596 : * Retain only the characters in the range
2597 : * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2598 : * Removes characters before <code>start</code> and at and after <code>limit</code>.
2599 : * @param start the offset of the first character to retain
2600 : * @param limit the offset immediately following the range to retain
2601 : * @return a reference to this
2602 : * @stable ICU 4.4
2603 : */
2604 : inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2605 :
2606 : /* Length operations */
2607 :
2608 : /**
2609 : * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2610 : * If the length of this UnicodeString is less than targetLength,
2611 : * length() - targetLength copies of padChar will be added to the
2612 : * beginning of this UnicodeString.
2613 : * @param targetLength the desired length of the string
2614 : * @param padChar the character to use for padding. Defaults to
2615 : * space (U+0020)
2616 : * @return TRUE if the text was padded, FALSE otherwise.
2617 : * @stable ICU 2.0
2618 : */
2619 : UBool padLeading(int32_t targetLength,
2620 : char16_t padChar = 0x0020);
2621 :
2622 : /**
2623 : * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2624 : * If the length of this UnicodeString is less than targetLength,
2625 : * length() - targetLength copies of padChar will be added to the
2626 : * end of this UnicodeString.
2627 : * @param targetLength the desired length of the string
2628 : * @param padChar the character to use for padding. Defaults to
2629 : * space (U+0020)
2630 : * @return TRUE if the text was padded, FALSE otherwise.
2631 : * @stable ICU 2.0
2632 : */
2633 : UBool padTrailing(int32_t targetLength,
2634 : char16_t padChar = 0x0020);
2635 :
2636 : /**
2637 : * Truncate this UnicodeString to the <TT>targetLength</TT>.
2638 : * @param targetLength the desired length of this UnicodeString.
2639 : * @return TRUE if the text was truncated, FALSE otherwise
2640 : * @stable ICU 2.0
2641 : */
2642 : inline UBool truncate(int32_t targetLength);
2643 :
2644 : /**
2645 : * Trims leading and trailing whitespace from this UnicodeString.
2646 : * @return a reference to this
2647 : * @stable ICU 2.0
2648 : */
2649 : UnicodeString& trim(void);
2650 :
2651 :
2652 : /* Miscellaneous operations */
2653 :
2654 : /**
2655 : * Reverse this UnicodeString in place.
2656 : * @return a reference to this
2657 : * @stable ICU 2.0
2658 : */
2659 : inline UnicodeString& reverse(void);
2660 :
2661 : /**
2662 : * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2663 : * this UnicodeString.
2664 : * @param start the start of the range to reverse
2665 : * @param length the number of characters to to reverse
2666 : * @return a reference to this
2667 : * @stable ICU 2.0
2668 : */
2669 : inline UnicodeString& reverse(int32_t start,
2670 : int32_t length);
2671 :
2672 : /**
2673 : * Convert the characters in this to UPPER CASE following the conventions of
2674 : * the default locale.
2675 : * @return A reference to this.
2676 : * @stable ICU 2.0
2677 : */
2678 : UnicodeString& toUpper(void);
2679 :
2680 : /**
2681 : * Convert the characters in this to UPPER CASE following the conventions of
2682 : * a specific locale.
2683 : * @param locale The locale containing the conventions to use.
2684 : * @return A reference to this.
2685 : * @stable ICU 2.0
2686 : */
2687 : UnicodeString& toUpper(const Locale& locale);
2688 :
2689 : /**
2690 : * Convert the characters in this to lower case following the conventions of
2691 : * the default locale.
2692 : * @return A reference to this.
2693 : * @stable ICU 2.0
2694 : */
2695 : UnicodeString& toLower(void);
2696 :
2697 : /**
2698 : * Convert the characters in this to lower case following the conventions of
2699 : * a specific locale.
2700 : * @param locale The locale containing the conventions to use.
2701 : * @return A reference to this.
2702 : * @stable ICU 2.0
2703 : */
2704 : UnicodeString& toLower(const Locale& locale);
2705 :
2706 : #if !UCONFIG_NO_BREAK_ITERATION
2707 :
2708 : /**
2709 : * Titlecase this string, convenience function using the default locale.
2710 : *
2711 : * Casing is locale-dependent and context-sensitive.
2712 : * Titlecasing uses a break iterator to find the first characters of words
2713 : * that are to be titlecased. It titlecases those characters and lowercases
2714 : * all others.
2715 : *
2716 : * The titlecase break iterator can be provided to customize for arbitrary
2717 : * styles, using rules and dictionaries beyond the standard iterators.
2718 : * It may be more efficient to always provide an iterator to avoid
2719 : * opening and closing one for each string.
2720 : * The standard titlecase iterator for the root locale implements the
2721 : * algorithm of Unicode TR 21.
2722 : *
2723 : * This function uses only the setText(), first() and next() methods of the
2724 : * provided break iterator.
2725 : *
2726 : * @param titleIter A break iterator to find the first characters of words
2727 : * that are to be titlecased.
2728 : * If none is provided (0), then a standard titlecase
2729 : * break iterator is opened.
2730 : * Otherwise the provided iterator is set to the string's text.
2731 : * @return A reference to this.
2732 : * @stable ICU 2.1
2733 : */
2734 : UnicodeString &toTitle(BreakIterator *titleIter);
2735 :
2736 : /**
2737 : * Titlecase this string.
2738 : *
2739 : * Casing is locale-dependent and context-sensitive.
2740 : * Titlecasing uses a break iterator to find the first characters of words
2741 : * that are to be titlecased. It titlecases those characters and lowercases
2742 : * all others.
2743 : *
2744 : * The titlecase break iterator can be provided to customize for arbitrary
2745 : * styles, using rules and dictionaries beyond the standard iterators.
2746 : * It may be more efficient to always provide an iterator to avoid
2747 : * opening and closing one for each string.
2748 : * The standard titlecase iterator for the root locale implements the
2749 : * algorithm of Unicode TR 21.
2750 : *
2751 : * This function uses only the setText(), first() and next() methods of the
2752 : * provided break iterator.
2753 : *
2754 : * @param titleIter A break iterator to find the first characters of words
2755 : * that are to be titlecased.
2756 : * If none is provided (0), then a standard titlecase
2757 : * break iterator is opened.
2758 : * Otherwise the provided iterator is set to the string's text.
2759 : * @param locale The locale to consider.
2760 : * @return A reference to this.
2761 : * @stable ICU 2.1
2762 : */
2763 : UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2764 :
2765 : /**
2766 : * Titlecase this string, with options.
2767 : *
2768 : * Casing is locale-dependent and context-sensitive.
2769 : * Titlecasing uses a break iterator to find the first characters of words
2770 : * that are to be titlecased. It titlecases those characters and lowercases
2771 : * all others. (This can be modified with options.)
2772 : *
2773 : * The titlecase break iterator can be provided to customize for arbitrary
2774 : * styles, using rules and dictionaries beyond the standard iterators.
2775 : * It may be more efficient to always provide an iterator to avoid
2776 : * opening and closing one for each string.
2777 : * The standard titlecase iterator for the root locale implements the
2778 : * algorithm of Unicode TR 21.
2779 : *
2780 : * This function uses only the setText(), first() and next() methods of the
2781 : * provided break iterator.
2782 : *
2783 : * @param titleIter A break iterator to find the first characters of words
2784 : * that are to be titlecased.
2785 : * If none is provided (0), then a standard titlecase
2786 : * break iterator is opened.
2787 : * Otherwise the provided iterator is set to the string's text.
2788 : * @param locale The locale to consider.
2789 : * @param options Options bit set, see ucasemap_open().
2790 : * @return A reference to this.
2791 : * @see U_TITLECASE_NO_LOWERCASE
2792 : * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2793 : * @see ucasemap_open
2794 : * @stable ICU 3.8
2795 : */
2796 : UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2797 :
2798 : #endif
2799 :
2800 : /**
2801 : * Case-folds the characters in this string.
2802 : *
2803 : * Case-folding is locale-independent and not context-sensitive,
2804 : * but there is an option for whether to include or exclude mappings for dotted I
2805 : * and dotless i that are marked with 'T' in CaseFolding.txt.
2806 : *
2807 : * The result may be longer or shorter than the original.
2808 : *
2809 : * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2810 : * @return A reference to this.
2811 : * @stable ICU 2.0
2812 : */
2813 : UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2814 :
2815 : //========================================
2816 : // Access to the internal buffer
2817 : //========================================
2818 :
2819 : /**
2820 : * Get a read/write pointer to the internal buffer.
2821 : * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
2822 : * writable, and is still owned by the UnicodeString object.
2823 : * Calls to getBuffer(minCapacity) must not be nested, and
2824 : * must be matched with calls to releaseBuffer(newLength).
2825 : * If the string buffer was read-only or shared,
2826 : * then it will be reallocated and copied.
2827 : *
2828 : * An attempted nested call will return 0, and will not further modify the
2829 : * state of the UnicodeString object.
2830 : * It also returns 0 if the string is bogus.
2831 : *
2832 : * The actual capacity of the string buffer may be larger than minCapacity.
2833 : * getCapacity() returns the actual capacity.
2834 : * For many operations, the full capacity should be used to avoid reallocations.
2835 : *
2836 : * While the buffer is "open" between getBuffer(minCapacity)
2837 : * and releaseBuffer(newLength), the following applies:
2838 : * - The string length is set to 0.
2839 : * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2840 : * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2841 : * - You can read from and write to the returned buffer.
2842 : * - The previous string contents will still be in the buffer;
2843 : * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2844 : * If the length() was greater than minCapacity, then any contents after minCapacity
2845 : * may be lost.
2846 : * The buffer contents is not NUL-terminated by getBuffer().
2847 : * If length()<getCapacity() then you can terminate it by writing a NUL
2848 : * at index length().
2849 : * - You must call releaseBuffer(newLength) before and in order to
2850 : * return to normal UnicodeString operation.
2851 : *
2852 : * @param minCapacity the minimum number of char16_ts that are to be available
2853 : * in the buffer, starting at the returned pointer;
2854 : * default to the current string capacity if minCapacity==-1
2855 : * @return a writable pointer to the internal string buffer,
2856 : * or nullptr if an error occurs (nested calls, out of memory)
2857 : *
2858 : * @see releaseBuffer
2859 : * @see getTerminatedBuffer()
2860 : * @stable ICU 2.0
2861 : */
2862 : char16_t *getBuffer(int32_t minCapacity);
2863 :
2864 : /**
2865 : * Release a read/write buffer on a UnicodeString object with an
2866 : * "open" getBuffer(minCapacity).
2867 : * This function must be called in a matched pair with getBuffer(minCapacity).
2868 : * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2869 : *
2870 : * It will set the string length to newLength, at most to the current capacity.
2871 : * If newLength==-1 then it will set the length according to the
2872 : * first NUL in the buffer, or to the capacity if there is no NUL.
2873 : *
2874 : * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2875 : *
2876 : * @param newLength the new length of the UnicodeString object;
2877 : * defaults to the current capacity if newLength is greater than that;
2878 : * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2879 : * the current capacity of the string
2880 : *
2881 : * @see getBuffer(int32_t minCapacity)
2882 : * @stable ICU 2.0
2883 : */
2884 : void releaseBuffer(int32_t newLength=-1);
2885 :
2886 : /**
2887 : * Get a read-only pointer to the internal buffer.
2888 : * This can be called at any time on a valid UnicodeString.
2889 : *
2890 : * It returns 0 if the string is bogus, or
2891 : * during an "open" getBuffer(minCapacity).
2892 : *
2893 : * It can be called as many times as desired.
2894 : * The pointer that it returns will remain valid until the UnicodeString object is modified,
2895 : * at which time the pointer is semantically invalidated and must not be used any more.
2896 : *
2897 : * The capacity of the buffer can be determined with getCapacity().
2898 : * The part after length() may or may not be initialized and valid,
2899 : * depending on the history of the UnicodeString object.
2900 : *
2901 : * The buffer contents is (probably) not NUL-terminated.
2902 : * You can check if it is with
2903 : * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2904 : * (See getTerminatedBuffer().)
2905 : *
2906 : * The buffer may reside in read-only memory. Its contents must not
2907 : * be modified.
2908 : *
2909 : * @return a read-only pointer to the internal string buffer,
2910 : * or nullptr if the string is empty or bogus
2911 : *
2912 : * @see getBuffer(int32_t minCapacity)
2913 : * @see getTerminatedBuffer()
2914 : * @stable ICU 2.0
2915 : */
2916 : inline const char16_t *getBuffer() const;
2917 :
2918 : /**
2919 : * Get a read-only pointer to the internal buffer,
2920 : * making sure that it is NUL-terminated.
2921 : * This can be called at any time on a valid UnicodeString.
2922 : *
2923 : * It returns 0 if the string is bogus, or
2924 : * during an "open" getBuffer(minCapacity), or if the buffer cannot
2925 : * be NUL-terminated (because memory allocation failed).
2926 : *
2927 : * It can be called as many times as desired.
2928 : * The pointer that it returns will remain valid until the UnicodeString object is modified,
2929 : * at which time the pointer is semantically invalidated and must not be used any more.
2930 : *
2931 : * The capacity of the buffer can be determined with getCapacity().
2932 : * The part after length()+1 may or may not be initialized and valid,
2933 : * depending on the history of the UnicodeString object.
2934 : *
2935 : * The buffer contents is guaranteed to be NUL-terminated.
2936 : * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2937 : * is written.
2938 : * For this reason, this function is not const, unlike getBuffer().
2939 : * Note that a UnicodeString may also contain NUL characters as part of its contents.
2940 : *
2941 : * The buffer may reside in read-only memory. Its contents must not
2942 : * be modified.
2943 : *
2944 : * @return a read-only pointer to the internal string buffer,
2945 : * or 0 if the string is empty or bogus
2946 : *
2947 : * @see getBuffer(int32_t minCapacity)
2948 : * @see getBuffer()
2949 : * @stable ICU 2.2
2950 : */
2951 : const char16_t *getTerminatedBuffer();
2952 :
2953 : //========================================
2954 : // Constructors
2955 : //========================================
2956 :
2957 : /** Construct an empty UnicodeString.
2958 : * @stable ICU 2.0
2959 : */
2960 : inline UnicodeString();
2961 :
2962 : /**
2963 : * Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts
2964 : * @param capacity the number of char16_ts this UnicodeString should hold
2965 : * before a resize is necessary; if count is greater than 0 and count
2966 : * code points c take up more space than capacity, then capacity is adjusted
2967 : * accordingly.
2968 : * @param c is used to initially fill the string
2969 : * @param count specifies how many code points c are to be written in the
2970 : * string
2971 : * @stable ICU 2.0
2972 : */
2973 : UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2974 :
2975 : /**
2976 : * Single char16_t (code unit) constructor.
2977 : *
2978 : * It is recommended to mark this constructor "explicit" by
2979 : * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2980 : * on the compiler command line or similar.
2981 : * @param ch the character to place in the UnicodeString
2982 : * @stable ICU 2.0
2983 : */
2984 : UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
2985 :
2986 : /**
2987 : * Single UChar32 (code point) constructor.
2988 : *
2989 : * It is recommended to mark this constructor "explicit" by
2990 : * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2991 : * on the compiler command line or similar.
2992 : * @param ch the character to place in the UnicodeString
2993 : * @stable ICU 2.0
2994 : */
2995 : UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2996 :
2997 : /**
2998 : * char16_t* constructor.
2999 : *
3000 : * It is recommended to mark this constructor "explicit" by
3001 : * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3002 : * on the compiler command line or similar.
3003 : * @param text The characters to place in the UnicodeString. <TT>text</TT>
3004 : * must be NULL (U+0000) terminated.
3005 : * @stable ICU 2.0
3006 : */
3007 : UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
3008 :
3009 : /*
3010 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3011 : * it should always be available regardless of U_HIDE_DRAFT_API status
3012 : */
3013 : #if !U_CHAR16_IS_TYPEDEF
3014 : /**
3015 : * uint16_t * constructor.
3016 : * Delegates to UnicodeString(const char16_t *).
3017 : *
3018 : * It is recommended to mark this constructor "explicit" by
3019 : * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3020 : * on the compiler command line or similar.
3021 : * @param text NUL-terminated UTF-16 string
3022 : * @draft ICU 59
3023 : */
3024 : UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3025 : UnicodeString(ConstChar16Ptr(text)) {}
3026 : #endif
3027 :
3028 : /*
3029 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3030 : * it should always be available regardless of U_HIDE_DRAFT_API status
3031 : */
3032 : #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3033 : /**
3034 : * wchar_t * constructor.
3035 : * (Only defined if U_SIZEOF_WCHAR_T==2.)
3036 : * Delegates to UnicodeString(const char16_t *).
3037 : *
3038 : * It is recommended to mark this constructor "explicit" by
3039 : * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3040 : * on the compiler command line or similar.
3041 : * @param text NUL-terminated UTF-16 string
3042 : * @draft ICU 59
3043 : */
3044 : UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3045 : UnicodeString(ConstChar16Ptr(text)) {}
3046 : #endif
3047 :
3048 : /*
3049 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3050 : * it should always be available regardless of U_HIDE_DRAFT_API status
3051 : */
3052 : /**
3053 : * nullptr_t constructor.
3054 : * Effectively the same as the default constructor, makes an empty string object.
3055 : *
3056 : * It is recommended to mark this constructor "explicit" by
3057 : * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3058 : * on the compiler command line or similar.
3059 : * @param text nullptr
3060 : * @draft ICU 59
3061 : */
3062 : UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3063 :
3064 : /**
3065 : * char16_t* constructor.
3066 : * @param text The characters to place in the UnicodeString.
3067 : * @param textLength The number of Unicode characters in <TT>text</TT>
3068 : * to copy.
3069 : * @stable ICU 2.0
3070 : */
3071 : UnicodeString(const char16_t *text,
3072 : int32_t textLength);
3073 :
3074 : /*
3075 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3076 : * it should always be available regardless of U_HIDE_DRAFT_API status
3077 : */
3078 : #if !U_CHAR16_IS_TYPEDEF
3079 : /**
3080 : * uint16_t * constructor.
3081 : * Delegates to UnicodeString(const char16_t *, int32_t).
3082 : * @param text UTF-16 string
3083 : * @param length string length
3084 : * @draft ICU 59
3085 : */
3086 : UnicodeString(const uint16_t *text, int32_t length) :
3087 : UnicodeString(ConstChar16Ptr(text), length) {}
3088 : #endif
3089 :
3090 : /*
3091 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3092 : * it should always be available regardless of U_HIDE_DRAFT_API status
3093 : */
3094 : #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3095 : /**
3096 : * wchar_t * constructor.
3097 : * (Only defined if U_SIZEOF_WCHAR_T==2.)
3098 : * Delegates to UnicodeString(const char16_t *, int32_t).
3099 : * @param text NUL-terminated UTF-16 string
3100 : * @param length string length
3101 : * @draft ICU 59
3102 : */
3103 : UnicodeString(const wchar_t *text, int32_t length) :
3104 : UnicodeString(ConstChar16Ptr(text), length) {}
3105 : #endif
3106 :
3107 : /*
3108 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3109 : * it should always be available regardless of U_HIDE_DRAFT_API status
3110 : */
3111 : /**
3112 : * nullptr_t constructor.
3113 : * Effectively the same as the default constructor, makes an empty string object.
3114 : * @param text nullptr
3115 : * @param length ignored
3116 : * @draft ICU 59
3117 : */
3118 : inline UnicodeString(const std::nullptr_t text, int32_t length);
3119 :
3120 : /**
3121 : * Readonly-aliasing char16_t* constructor.
3122 : * The text will be used for the UnicodeString object, but
3123 : * it will not be released when the UnicodeString is destroyed.
3124 : * This has copy-on-write semantics:
3125 : * When the string is modified, then the buffer is first copied into
3126 : * newly allocated memory.
3127 : * The aliased buffer is never modified.
3128 : *
3129 : * In an assignment to another UnicodeString, when using the copy constructor
3130 : * or the assignment operator, the text will be copied.
3131 : * When using fastCopyFrom(), the text will be aliased again,
3132 : * so that both strings then alias the same readonly-text.
3133 : *
3134 : * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3135 : * This must be true if <code>textLength==-1</code>.
3136 : * @param text The characters to alias for the UnicodeString.
3137 : * @param textLength The number of Unicode characters in <code>text</code> to alias.
3138 : * If -1, then this constructor will determine the length
3139 : * by calling <code>u_strlen()</code>.
3140 : * @stable ICU 2.0
3141 : */
3142 : UnicodeString(UBool isTerminated,
3143 : ConstChar16Ptr text,
3144 : int32_t textLength);
3145 :
3146 : /**
3147 : * Writable-aliasing char16_t* constructor.
3148 : * The text will be used for the UnicodeString object, but
3149 : * it will not be released when the UnicodeString is destroyed.
3150 : * This has write-through semantics:
3151 : * For as long as the capacity of the buffer is sufficient, write operations
3152 : * will directly affect the buffer. When more capacity is necessary, then
3153 : * a new buffer will be allocated and the contents copied as with regularly
3154 : * constructed strings.
3155 : * In an assignment to another UnicodeString, the buffer will be copied.
3156 : * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
3157 : * as the string buffer itself and will in this case not copy the contents.
3158 : *
3159 : * @param buffer The characters to alias for the UnicodeString.
3160 : * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3161 : * @param buffCapacity The size of <code>buffer</code> in char16_ts.
3162 : * @stable ICU 2.0
3163 : */
3164 : UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3165 :
3166 : /*
3167 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3168 : * it should always be available regardless of U_HIDE_DRAFT_API status
3169 : */
3170 : #if !U_CHAR16_IS_TYPEDEF
3171 : /**
3172 : * Writable-aliasing uint16_t * constructor.
3173 : * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3174 : * @param buffer writable buffer of/for UTF-16 text
3175 : * @param buffLength length of the current buffer contents
3176 : * @param buffCapacity buffer capacity
3177 : * @draft ICU 59
3178 : */
3179 : UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3180 : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3181 : #endif
3182 :
3183 : /*
3184 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3185 : * it should always be available regardless of U_HIDE_DRAFT_API status
3186 : */
3187 : #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3188 : /**
3189 : * Writable-aliasing wchar_t * constructor.
3190 : * (Only defined if U_SIZEOF_WCHAR_T==2.)
3191 : * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
3192 : * @param buffer writable buffer of/for UTF-16 text
3193 : * @param buffLength length of the current buffer contents
3194 : * @param buffCapacity buffer capacity
3195 : * @draft ICU 59
3196 : */
3197 : UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3198 : UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3199 : #endif
3200 :
3201 : /*
3202 : * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor,
3203 : * it should always be available regardless of U_HIDE_DRAFT_API status
3204 : */
3205 : /**
3206 : * Writable-aliasing nullptr_t constructor.
3207 : * Effectively the same as the default constructor, makes an empty string object.
3208 : * @param buffer nullptr
3209 : * @param buffLength ignored
3210 : * @param buffCapacity ignored
3211 : * @draft ICU 59
3212 : */
3213 : inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3214 :
3215 : #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3216 :
3217 : /**
3218 : * char* constructor.
3219 : * Uses the default converter (and thus depends on the ICU conversion code)
3220 : * unless U_CHARSET_IS_UTF8 is set to 1.
3221 : *
3222 : * For ASCII (really "invariant character") strings it is more efficient to use
3223 : * the constructor that takes a US_INV (for its enum EInvariant).
3224 : * For ASCII (invariant-character) string literals, see UNICODE_STRING and
3225 : * UNICODE_STRING_SIMPLE.
3226 : *
3227 : * It is recommended to mark this constructor "explicit" by
3228 : * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3229 : * on the compiler command line or similar.
3230 : * @param codepageData an array of bytes, null-terminated,
3231 : * in the platform's default codepage.
3232 : * @stable ICU 2.0
3233 : * @see UNICODE_STRING
3234 : * @see UNICODE_STRING_SIMPLE
3235 : */
3236 : UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3237 :
3238 : /**
3239 : * char* constructor.
3240 : * Uses the default converter (and thus depends on the ICU conversion code)
3241 : * unless U_CHARSET_IS_UTF8 is set to 1.
3242 : * @param codepageData an array of bytes in the platform's default codepage.
3243 : * @param dataLength The number of bytes in <TT>codepageData</TT>.
3244 : * @stable ICU 2.0
3245 : */
3246 : UnicodeString(const char *codepageData, int32_t dataLength);
3247 :
3248 : #endif
3249 :
3250 : #if !UCONFIG_NO_CONVERSION
3251 :
3252 : /**
3253 : * char* constructor.
3254 : * @param codepageData an array of bytes, null-terminated
3255 : * @param codepage the encoding of <TT>codepageData</TT>. The special
3256 : * value 0 for <TT>codepage</TT> indicates that the text is in the
3257 : * platform's default codepage.
3258 : *
3259 : * If <code>codepage</code> is an empty string (<code>""</code>),
3260 : * then a simple conversion is performed on the codepage-invariant
3261 : * subset ("invariant characters") of the platform encoding. See utypes.h.
3262 : * Recommendation: For invariant-character strings use the constructor
3263 : * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3264 : * because it avoids object code dependencies of UnicodeString on
3265 : * the conversion code.
3266 : *
3267 : * @stable ICU 2.0
3268 : */
3269 : UnicodeString(const char *codepageData, const char *codepage);
3270 :
3271 : /**
3272 : * char* constructor.
3273 : * @param codepageData an array of bytes.
3274 : * @param dataLength The number of bytes in <TT>codepageData</TT>.
3275 : * @param codepage the encoding of <TT>codepageData</TT>. The special
3276 : * value 0 for <TT>codepage</TT> indicates that the text is in the
3277 : * platform's default codepage.
3278 : * If <code>codepage</code> is an empty string (<code>""</code>),
3279 : * then a simple conversion is performed on the codepage-invariant
3280 : * subset ("invariant characters") of the platform encoding. See utypes.h.
3281 : * Recommendation: For invariant-character strings use the constructor
3282 : * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3283 : * because it avoids object code dependencies of UnicodeString on
3284 : * the conversion code.
3285 : *
3286 : * @stable ICU 2.0
3287 : */
3288 : UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3289 :
3290 : /**
3291 : * char * / UConverter constructor.
3292 : * This constructor uses an existing UConverter object to
3293 : * convert the codepage string to Unicode and construct a UnicodeString
3294 : * from that.
3295 : *
3296 : * The converter is reset at first.
3297 : * If the error code indicates a failure before this constructor is called,
3298 : * or if an error occurs during conversion or construction,
3299 : * then the string will be bogus.
3300 : *
3301 : * This function avoids the overhead of opening and closing a converter if
3302 : * multiple strings are constructed.
3303 : *
3304 : * @param src input codepage string
3305 : * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3306 : * @param cnv converter object (ucnv_resetToUnicode() will be called),
3307 : * can be NULL for the default converter
3308 : * @param errorCode normal ICU error code
3309 : * @stable ICU 2.0
3310 : */
3311 : UnicodeString(
3312 : const char *src, int32_t srcLength,
3313 : UConverter *cnv,
3314 : UErrorCode &errorCode);
3315 :
3316 : #endif
3317 :
3318 : /**
3319 : * Constructs a Unicode string from an invariant-character char * string.
3320 : * About invariant characters see utypes.h.
3321 : * This constructor has no runtime dependency on conversion code and is
3322 : * therefore recommended over ones taking a charset name string
3323 : * (where the empty string "" indicates invariant-character conversion).
3324 : *
3325 : * Use the macro US_INV as the third, signature-distinguishing parameter.
3326 : *
3327 : * For example:
3328 : * \code
3329 : * void fn(const char *s) {
3330 : * UnicodeString ustr(s, -1, US_INV);
3331 : * // use ustr ...
3332 : * }
3333 : * \endcode
3334 : *
3335 : * @param src String using only invariant characters.
3336 : * @param length Length of src, or -1 if NUL-terminated.
3337 : * @param inv Signature-distinguishing paramater, use US_INV.
3338 : *
3339 : * @see US_INV
3340 : * @stable ICU 3.2
3341 : */
3342 : UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3343 :
3344 :
3345 : /**
3346 : * Copy constructor.
3347 : *
3348 : * Starting with ICU 2.4, the assignment operator and the copy constructor
3349 : * allocate a new buffer and copy the buffer contents even for readonly aliases.
3350 : * By contrast, the fastCopyFrom() function implements the old,
3351 : * more efficient but less safe behavior
3352 : * of making this string also a readonly alias to the same buffer.
3353 : *
3354 : * If the source object has an "open" buffer from getBuffer(minCapacity),
3355 : * then the copy is an empty string.
3356 : *
3357 : * @param that The UnicodeString object to copy.
3358 : * @stable ICU 2.0
3359 : * @see fastCopyFrom
3360 : */
3361 : UnicodeString(const UnicodeString& that);
3362 :
3363 : #if U_HAVE_RVALUE_REFERENCES
3364 : /**
3365 : * Move constructor, might leave src in bogus state.
3366 : * This string will have the same contents and state that the source string had.
3367 : * @param src source string
3368 : * @stable ICU 56
3369 : */
3370 : UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3371 : #endif
3372 :
3373 : /**
3374 : * 'Substring' constructor from tail of source string.
3375 : * @param src The UnicodeString object to copy.
3376 : * @param srcStart The offset into <tt>src</tt> at which to start copying.
3377 : * @stable ICU 2.2
3378 : */
3379 : UnicodeString(const UnicodeString& src, int32_t srcStart);
3380 :
3381 : /**
3382 : * 'Substring' constructor from subrange of source string.
3383 : * @param src The UnicodeString object to copy.
3384 : * @param srcStart The offset into <tt>src</tt> at which to start copying.
3385 : * @param srcLength The number of characters from <tt>src</tt> to copy.
3386 : * @stable ICU 2.2
3387 : */
3388 : UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3389 :
3390 : /**
3391 : * Clone this object, an instance of a subclass of Replaceable.
3392 : * Clones can be used concurrently in multiple threads.
3393 : * If a subclass does not implement clone(), or if an error occurs,
3394 : * then NULL is returned.
3395 : * The clone functions in all subclasses return a pointer to a Replaceable
3396 : * because some compilers do not support covariant (same-as-this)
3397 : * return types; cast to the appropriate subclass if necessary.
3398 : * The caller must delete the clone.
3399 : *
3400 : * @return a clone of this object
3401 : *
3402 : * @see Replaceable::clone
3403 : * @see getDynamicClassID
3404 : * @stable ICU 2.6
3405 : */
3406 : virtual Replaceable *clone() const;
3407 :
3408 : /** Destructor.
3409 : * @stable ICU 2.0
3410 : */
3411 : virtual ~UnicodeString();
3412 :
3413 : /**
3414 : * Create a UnicodeString from a UTF-8 string.
3415 : * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3416 : * Calls u_strFromUTF8WithSub().
3417 : *
3418 : * @param utf8 UTF-8 input string.
3419 : * Note that a StringPiece can be implicitly constructed
3420 : * from a std::string or a NUL-terminated const char * string.
3421 : * @return A UnicodeString with equivalent UTF-16 contents.
3422 : * @see toUTF8
3423 : * @see toUTF8String
3424 : * @stable ICU 4.2
3425 : */
3426 : static UnicodeString fromUTF8(StringPiece utf8);
3427 :
3428 : /**
3429 : * Create a UnicodeString from a UTF-32 string.
3430 : * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3431 : * Calls u_strFromUTF32WithSub().
3432 : *
3433 : * @param utf32 UTF-32 input string. Must not be NULL.
3434 : * @param length Length of the input string, or -1 if NUL-terminated.
3435 : * @return A UnicodeString with equivalent UTF-16 contents.
3436 : * @see toUTF32
3437 : * @stable ICU 4.2
3438 : */
3439 : static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3440 :
3441 : /* Miscellaneous operations */
3442 :
3443 : /**
3444 : * Unescape a string of characters and return a string containing
3445 : * the result. The following escape sequences are recognized:
3446 : *
3447 : * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3448 : * \\Uhhhhhhhh 8 hex digits
3449 : * \\xhh 1-2 hex digits
3450 : * \\ooo 1-3 octal digits; o in [0-7]
3451 : * \\cX control-X; X is masked with 0x1F
3452 : *
3453 : * as well as the standard ANSI C escapes:
3454 : *
3455 : * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3456 : * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3457 : * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3458 : *
3459 : * Anything else following a backslash is generically escaped. For
3460 : * example, "[a\\-z]" returns "[a-z]".
3461 : *
3462 : * If an escape sequence is ill-formed, this method returns an empty
3463 : * string. An example of an ill-formed sequence is "\\u" followed by
3464 : * fewer than 4 hex digits.
3465 : *
3466 : * This function is similar to u_unescape() but not identical to it.
3467 : * The latter takes a source char*, so it does escape recognition
3468 : * and also invariant conversion.
3469 : *
3470 : * @return a string with backslash escapes interpreted, or an
3471 : * empty string on error.
3472 : * @see UnicodeString#unescapeAt()
3473 : * @see u_unescape()
3474 : * @see u_unescapeAt()
3475 : * @stable ICU 2.0
3476 : */
3477 : UnicodeString unescape() const;
3478 :
3479 : /**
3480 : * Unescape a single escape sequence and return the represented
3481 : * character. See unescape() for a listing of the recognized escape
3482 : * sequences. The character at offset-1 is assumed (without
3483 : * checking) to be a backslash. If the escape sequence is
3484 : * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3485 : * returned.
3486 : *
3487 : * @param offset an input output parameter. On input, it is the
3488 : * offset into this string where the escape sequence is located,
3489 : * after the initial backslash. On output, it is advanced after the
3490 : * last character parsed. On error, it is not advanced at all.
3491 : * @return the character represented by the escape sequence at
3492 : * offset, or U_SENTINEL=-1 on error.
3493 : * @see UnicodeString#unescape()
3494 : * @see u_unescape()
3495 : * @see u_unescapeAt()
3496 : * @stable ICU 2.0
3497 : */
3498 : UChar32 unescapeAt(int32_t &offset) const;
3499 :
3500 : /**
3501 : * ICU "poor man's RTTI", returns a UClassID for this class.
3502 : *
3503 : * @stable ICU 2.2
3504 : */
3505 : static UClassID U_EXPORT2 getStaticClassID();
3506 :
3507 : /**
3508 : * ICU "poor man's RTTI", returns a UClassID for the actual class.
3509 : *
3510 : * @stable ICU 2.2
3511 : */
3512 : virtual UClassID getDynamicClassID() const;
3513 :
3514 : //========================================
3515 : // Implementation methods
3516 : //========================================
3517 :
3518 : protected:
3519 : /**
3520 : * Implement Replaceable::getLength() (see jitterbug 1027).
3521 : * @stable ICU 2.4
3522 : */
3523 : virtual int32_t getLength() const;
3524 :
3525 : /**
3526 : * The change in Replaceable to use virtual getCharAt() allows
3527 : * UnicodeString::charAt() to be inline again (see jitterbug 709).
3528 : * @stable ICU 2.4
3529 : */
3530 : virtual char16_t getCharAt(int32_t offset) const;
3531 :
3532 : /**
3533 : * The change in Replaceable to use virtual getChar32At() allows
3534 : * UnicodeString::char32At() to be inline again (see jitterbug 709).
3535 : * @stable ICU 2.4
3536 : */
3537 : virtual UChar32 getChar32At(int32_t offset) const;
3538 :
3539 : private:
3540 : // For char* constructors. Could be made public.
3541 : UnicodeString &setToUTF8(StringPiece utf8);
3542 : // For extract(char*).
3543 : // We could make a toUTF8(target, capacity, errorCode) public but not
3544 : // this version: New API will be cleaner if we make callers create substrings
3545 : // rather than having start+length on every method,
3546 : // and it should take a UErrorCode&.
3547 : int32_t
3548 : toUTF8(int32_t start, int32_t len,
3549 : char *target, int32_t capacity) const;
3550 :
3551 : /**
3552 : * Internal string contents comparison, called by operator==.
3553 : * Requires: this & text not bogus and have same lengths.
3554 : */
3555 : UBool doEquals(const UnicodeString &text, int32_t len) const;
3556 :
3557 : inline int8_t
3558 : doCompare(int32_t start,
3559 : int32_t length,
3560 : const UnicodeString& srcText,
3561 : int32_t srcStart,
3562 : int32_t srcLength) const;
3563 :
3564 : int8_t doCompare(int32_t start,
3565 : int32_t length,
3566 : const char16_t *srcChars,
3567 : int32_t srcStart,
3568 : int32_t srcLength) const;
3569 :
3570 : inline int8_t
3571 : doCompareCodePointOrder(int32_t start,
3572 : int32_t length,
3573 : const UnicodeString& srcText,
3574 : int32_t srcStart,
3575 : int32_t srcLength) const;
3576 :
3577 : int8_t doCompareCodePointOrder(int32_t start,
3578 : int32_t length,
3579 : const char16_t *srcChars,
3580 : int32_t srcStart,
3581 : int32_t srcLength) const;
3582 :
3583 : inline int8_t
3584 : doCaseCompare(int32_t start,
3585 : int32_t length,
3586 : const UnicodeString &srcText,
3587 : int32_t srcStart,
3588 : int32_t srcLength,
3589 : uint32_t options) const;
3590 :
3591 : int8_t
3592 : doCaseCompare(int32_t start,
3593 : int32_t length,
3594 : const char16_t *srcChars,
3595 : int32_t srcStart,
3596 : int32_t srcLength,
3597 : uint32_t options) const;
3598 :
3599 : int32_t doIndexOf(char16_t c,
3600 : int32_t start,
3601 : int32_t length) const;
3602 :
3603 : int32_t doIndexOf(UChar32 c,
3604 : int32_t start,
3605 : int32_t length) const;
3606 :
3607 : int32_t doLastIndexOf(char16_t c,
3608 : int32_t start,
3609 : int32_t length) const;
3610 :
3611 : int32_t doLastIndexOf(UChar32 c,
3612 : int32_t start,
3613 : int32_t length) const;
3614 :
3615 : void doExtract(int32_t start,
3616 : int32_t length,
3617 : char16_t *dst,
3618 : int32_t dstStart) const;
3619 :
3620 : inline void doExtract(int32_t start,
3621 : int32_t length,
3622 : UnicodeString& target) const;
3623 :
3624 : inline char16_t doCharAt(int32_t offset) const;
3625 :
3626 : UnicodeString& doReplace(int32_t start,
3627 : int32_t length,
3628 : const UnicodeString& srcText,
3629 : int32_t srcStart,
3630 : int32_t srcLength);
3631 :
3632 : UnicodeString& doReplace(int32_t start,
3633 : int32_t length,
3634 : const char16_t *srcChars,
3635 : int32_t srcStart,
3636 : int32_t srcLength);
3637 :
3638 : UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3639 : UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3640 :
3641 : UnicodeString& doReverse(int32_t start,
3642 : int32_t length);
3643 :
3644 : // calculate hash code
3645 : int32_t doHashCode(void) const;
3646 :
3647 : // get pointer to start of array
3648 : // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3649 : inline char16_t* getArrayStart(void);
3650 : inline const char16_t* getArrayStart(void) const;
3651 :
3652 : inline UBool hasShortLength() const;
3653 : inline int32_t getShortLength() const;
3654 :
3655 : // A UnicodeString object (not necessarily its current buffer)
3656 : // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3657 : inline UBool isWritable() const;
3658 :
3659 : // Is the current buffer writable?
3660 : inline UBool isBufferWritable() const;
3661 :
3662 : // None of the following does releaseArray().
3663 : inline void setZeroLength();
3664 : inline void setShortLength(int32_t len);
3665 : inline void setLength(int32_t len);
3666 : inline void setToEmpty();
3667 : inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3668 :
3669 : // allocate the array; result may be the stack buffer
3670 : // sets refCount to 1 if appropriate
3671 : // sets fArray, fCapacity, and flags
3672 : // sets length to 0
3673 : // returns boolean for success or failure
3674 : UBool allocate(int32_t capacity);
3675 :
3676 : // release the array if owned
3677 : void releaseArray(void);
3678 :
3679 : // turn a bogus string into an empty one
3680 : void unBogus();
3681 :
3682 : // implements assigment operator, copy constructor, and fastCopyFrom()
3683 : UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
3684 :
3685 : // Copies just the fields without memory management.
3686 : void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3687 :
3688 : // Pin start and limit to acceptable values.
3689 : inline void pinIndex(int32_t& start) const;
3690 : inline void pinIndices(int32_t& start,
3691 : int32_t& length) const;
3692 :
3693 : #if !UCONFIG_NO_CONVERSION
3694 :
3695 : /* Internal extract() using UConverter. */
3696 : int32_t doExtract(int32_t start, int32_t length,
3697 : char *dest, int32_t destCapacity,
3698 : UConverter *cnv,
3699 : UErrorCode &errorCode) const;
3700 :
3701 : /*
3702 : * Real constructor for converting from codepage data.
3703 : * It assumes that it is called with !fRefCounted.
3704 : *
3705 : * If <code>codepage==0</code>, then the default converter
3706 : * is used for the platform encoding.
3707 : * If <code>codepage</code> is an empty string (<code>""</code>),
3708 : * then a simple conversion is performed on the codepage-invariant
3709 : * subset ("invariant characters") of the platform encoding. See utypes.h.
3710 : */
3711 : void doCodepageCreate(const char *codepageData,
3712 : int32_t dataLength,
3713 : const char *codepage);
3714 :
3715 : /*
3716 : * Worker function for creating a UnicodeString from
3717 : * a codepage string using a UConverter.
3718 : */
3719 : void
3720 : doCodepageCreate(const char *codepageData,
3721 : int32_t dataLength,
3722 : UConverter *converter,
3723 : UErrorCode &status);
3724 :
3725 : #endif
3726 :
3727 : /*
3728 : * This function is called when write access to the array
3729 : * is necessary.
3730 : *
3731 : * We need to make a copy of the array if
3732 : * the buffer is read-only, or
3733 : * the buffer is refCounted (shared), and refCount>1, or
3734 : * the buffer is too small.
3735 : *
3736 : * Return FALSE if memory could not be allocated.
3737 : */
3738 : UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3739 : int32_t growCapacity = -1,
3740 : UBool doCopyArray = TRUE,
3741 : int32_t **pBufferToDelete = 0,
3742 : UBool forceClone = FALSE);
3743 :
3744 : /**
3745 : * Common function for UnicodeString case mappings.
3746 : * The stringCaseMapper has the same type UStringCaseMapper
3747 : * as in ustr_imp.h for ustrcase_map().
3748 : */
3749 : UnicodeString &
3750 : caseMap(int32_t caseLocale, uint32_t options,
3751 : #if !UCONFIG_NO_BREAK_ITERATION
3752 : BreakIterator *iter,
3753 : #endif
3754 : UStringCaseMapper *stringCaseMapper);
3755 :
3756 : // ref counting
3757 : void addRef(void);
3758 : int32_t removeRef(void);
3759 : int32_t refCount(void) const;
3760 :
3761 : // constants
3762 : enum {
3763 : /**
3764 : * Size of stack buffer for short strings.
3765 : * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3766 : * @see UNISTR_OBJECT_SIZE
3767 : */
3768 : US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3769 : kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3770 : kInvalidHashCode=0, // invalid hash code
3771 : kEmptyHashCode=1, // hash code for empty string
3772 :
3773 : // bit flag values for fLengthAndFlags
3774 : kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3775 : kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3776 : kRefCounted=4, // there is a refCount field before the characters in fArray
3777 : kBufferIsReadonly=8,// do not write to this buffer
3778 : kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3779 : // and releaseBuffer(newLength) must be called
3780 : kAllStorageFlags=0x1f,
3781 :
3782 : kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
3783 : kLength1=1<<kLengthShift,
3784 : kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
3785 : kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
3786 :
3787 : // combined values for convenience
3788 : kShortString=kUsingStackBuffer,
3789 : kLongString=kRefCounted,
3790 : kReadonlyAlias=kBufferIsReadonly,
3791 : kWritableAlias=0
3792 : };
3793 :
3794 : friend class UnicodeStringAppendable;
3795 :
3796 : union StackBufferOrFields; // forward declaration necessary before friend declaration
3797 : friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3798 :
3799 : /*
3800 : * The following are all the class fields that are stored
3801 : * in each UnicodeString object.
3802 : * Note that UnicodeString has virtual functions,
3803 : * therefore there is an implicit vtable pointer
3804 : * as the first real field.
3805 : * The fields should be aligned such that no padding is necessary.
3806 : * On 32-bit machines, the size should be 32 bytes,
3807 : * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3808 : *
3809 : * We use a hack to achieve this.
3810 : *
3811 : * With at least some compilers, each of the following is forced to
3812 : * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3813 : * rounded up with additional padding if the fields do not already fit that requirement:
3814 : * - sizeof(class UnicodeString)
3815 : * - offsetof(UnicodeString, fUnion)
3816 : * - sizeof(fUnion)
3817 : * - sizeof(fStackFields)
3818 : *
3819 : * We optimize for the longest possible internal buffer for short strings.
3820 : * fUnion.fStackFields begins with 2 bytes for storage flags
3821 : * and the length of relatively short strings,
3822 : * followed by the buffer for short string contents.
3823 : * There is no padding inside fStackFields.
3824 : *
3825 : * Heap-allocated and aliased strings use fUnion.fFields.
3826 : * Both fStackFields and fFields must begin with the same fields for flags and short length,
3827 : * that is, those must have the same memory offsets inside the object,
3828 : * because the flags must be inspected in order to decide which half of fUnion is being used.
3829 : * We assume that the compiler does not reorder the fields.
3830 : *
3831 : * (Padding at the end of fFields is ok:
3832 : * As long as it is no larger than fStackFields, it is not wasted space.)
3833 : *
3834 : * For some of the history of the UnicodeString class fields layout, see
3835 : * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3836 : * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3837 : * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3838 : */
3839 : // (implicit) *vtable;
3840 : union StackBufferOrFields {
3841 : // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3842 : // Each struct of the union must begin with fLengthAndFlags.
3843 : struct {
3844 : int16_t fLengthAndFlags; // bit fields: see constants above
3845 : char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3846 : } fStackFields;
3847 : struct {
3848 : int16_t fLengthAndFlags; // bit fields: see constants above
3849 : int32_t fLength; // number of characters in fArray if >127; else undefined
3850 : int32_t fCapacity; // capacity of fArray (in char16_ts)
3851 : // array pointer last to minimize padding for machines with P128 data model
3852 : // or pointer sizes that are not a power of 2
3853 : char16_t *fArray; // the Unicode data
3854 : } fFields;
3855 : } fUnion;
3856 : };
3857 :
3858 : /**
3859 : * Create a new UnicodeString with the concatenation of two others.
3860 : *
3861 : * @param s1 The first string to be copied to the new one.
3862 : * @param s2 The second string to be copied to the new one, after s1.
3863 : * @return UnicodeString(s1).append(s2)
3864 : * @stable ICU 2.8
3865 : */
3866 : U_COMMON_API UnicodeString U_EXPORT2
3867 : operator+ (const UnicodeString &s1, const UnicodeString &s2);
3868 :
3869 : //========================================
3870 : // Inline members
3871 : //========================================
3872 :
3873 : //========================================
3874 : // Privates
3875 : //========================================
3876 :
3877 : inline void
3878 0 : UnicodeString::pinIndex(int32_t& start) const
3879 : {
3880 : // pin index
3881 0 : if(start < 0) {
3882 0 : start = 0;
3883 0 : } else if(start > length()) {
3884 0 : start = length();
3885 : }
3886 0 : }
3887 :
3888 : inline void
3889 0 : UnicodeString::pinIndices(int32_t& start,
3890 : int32_t& _length) const
3891 : {
3892 : // pin indices
3893 0 : int32_t len = length();
3894 0 : if(start < 0) {
3895 0 : start = 0;
3896 0 : } else if(start > len) {
3897 0 : start = len;
3898 : }
3899 0 : if(_length < 0) {
3900 0 : _length = 0;
3901 0 : } else if(_length > (len - start)) {
3902 0 : _length = (len - start);
3903 : }
3904 0 : }
3905 :
3906 : inline char16_t*
3907 0 : UnicodeString::getArrayStart() {
3908 0 : return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3909 0 : fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3910 : }
3911 :
3912 : inline const char16_t*
3913 0 : UnicodeString::getArrayStart() const {
3914 0 : return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3915 0 : fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3916 : }
3917 :
3918 : //========================================
3919 : // Default constructor
3920 : //========================================
3921 :
3922 : inline
3923 0 : UnicodeString::UnicodeString() {
3924 0 : fUnion.fStackFields.fLengthAndFlags=kShortString;
3925 0 : }
3926 :
3927 : inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
3928 : fUnion.fStackFields.fLengthAndFlags=kShortString;
3929 : }
3930 :
3931 : inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
3932 : fUnion.fStackFields.fLengthAndFlags=kShortString;
3933 : }
3934 :
3935 : inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
3936 : fUnion.fStackFields.fLengthAndFlags=kShortString;
3937 : }
3938 :
3939 : //========================================
3940 : // Read-only implementation methods
3941 : //========================================
3942 : inline UBool
3943 0 : UnicodeString::hasShortLength() const {
3944 0 : return fUnion.fFields.fLengthAndFlags>=0;
3945 : }
3946 :
3947 : inline int32_t
3948 0 : UnicodeString::getShortLength() const {
3949 : // fLengthAndFlags must be non-negative -> short length >= 0
3950 : // and arithmetic or logical shift does not matter.
3951 0 : return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3952 : }
3953 :
3954 : inline int32_t
3955 0 : UnicodeString::length() const {
3956 0 : return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3957 : }
3958 :
3959 : inline int32_t
3960 0 : UnicodeString::getCapacity() const {
3961 0 : return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3962 0 : US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3963 : }
3964 :
3965 : inline int32_t
3966 0 : UnicodeString::hashCode() const
3967 0 : { return doHashCode(); }
3968 :
3969 : inline UBool
3970 0 : UnicodeString::isBogus() const
3971 0 : { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3972 :
3973 : inline UBool
3974 0 : UnicodeString::isWritable() const
3975 0 : { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3976 :
3977 : inline UBool
3978 0 : UnicodeString::isBufferWritable() const
3979 : {
3980 : return (UBool)(
3981 0 : !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3982 0 : (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3983 : }
3984 :
3985 : inline const char16_t *
3986 0 : UnicodeString::getBuffer() const {
3987 0 : if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3988 0 : return nullptr;
3989 0 : } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3990 0 : return fUnion.fStackFields.fBuffer;
3991 : } else {
3992 0 : return fUnion.fFields.fArray;
3993 : }
3994 : }
3995 :
3996 : //========================================
3997 : // Read-only alias methods
3998 : //========================================
3999 : inline int8_t
4000 0 : UnicodeString::doCompare(int32_t start,
4001 : int32_t thisLength,
4002 : const UnicodeString& srcText,
4003 : int32_t srcStart,
4004 : int32_t srcLength) const
4005 : {
4006 0 : if(srcText.isBogus()) {
4007 0 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4008 : } else {
4009 0 : srcText.pinIndices(srcStart, srcLength);
4010 0 : return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4011 : }
4012 : }
4013 :
4014 : inline UBool
4015 0 : UnicodeString::operator== (const UnicodeString& text) const
4016 : {
4017 0 : if(isBogus()) {
4018 0 : return text.isBogus();
4019 : } else {
4020 0 : int32_t len = length(), textLength = text.length();
4021 0 : return !text.isBogus() && len == textLength && doEquals(text, len);
4022 : }
4023 : }
4024 :
4025 : inline UBool
4026 0 : UnicodeString::operator!= (const UnicodeString& text) const
4027 0 : { return (! operator==(text)); }
4028 :
4029 : inline UBool
4030 : UnicodeString::operator> (const UnicodeString& text) const
4031 : { return doCompare(0, length(), text, 0, text.length()) == 1; }
4032 :
4033 : inline UBool
4034 : UnicodeString::operator< (const UnicodeString& text) const
4035 : { return doCompare(0, length(), text, 0, text.length()) == -1; }
4036 :
4037 : inline UBool
4038 : UnicodeString::operator>= (const UnicodeString& text) const
4039 : { return doCompare(0, length(), text, 0, text.length()) != -1; }
4040 :
4041 : inline UBool
4042 : UnicodeString::operator<= (const UnicodeString& text) const
4043 : { return doCompare(0, length(), text, 0, text.length()) != 1; }
4044 :
4045 : inline int8_t
4046 0 : UnicodeString::compare(const UnicodeString& text) const
4047 0 : { return doCompare(0, length(), text, 0, text.length()); }
4048 :
4049 : inline int8_t
4050 0 : UnicodeString::compare(int32_t start,
4051 : int32_t _length,
4052 : const UnicodeString& srcText) const
4053 0 : { return doCompare(start, _length, srcText, 0, srcText.length()); }
4054 :
4055 : inline int8_t
4056 0 : UnicodeString::compare(ConstChar16Ptr srcChars,
4057 : int32_t srcLength) const
4058 0 : { return doCompare(0, length(), srcChars, 0, srcLength); }
4059 :
4060 : inline int8_t
4061 0 : UnicodeString::compare(int32_t start,
4062 : int32_t _length,
4063 : const UnicodeString& srcText,
4064 : int32_t srcStart,
4065 : int32_t srcLength) const
4066 0 : { return doCompare(start, _length, srcText, srcStart, srcLength); }
4067 :
4068 : inline int8_t
4069 0 : UnicodeString::compare(int32_t start,
4070 : int32_t _length,
4071 : const char16_t *srcChars) const
4072 0 : { return doCompare(start, _length, srcChars, 0, _length); }
4073 :
4074 : inline int8_t
4075 0 : UnicodeString::compare(int32_t start,
4076 : int32_t _length,
4077 : const char16_t *srcChars,
4078 : int32_t srcStart,
4079 : int32_t srcLength) const
4080 0 : { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4081 :
4082 : inline int8_t
4083 0 : UnicodeString::compareBetween(int32_t start,
4084 : int32_t limit,
4085 : const UnicodeString& srcText,
4086 : int32_t srcStart,
4087 : int32_t srcLimit) const
4088 0 : { return doCompare(start, limit - start,
4089 0 : srcText, srcStart, srcLimit - srcStart); }
4090 :
4091 : inline int8_t
4092 0 : UnicodeString::doCompareCodePointOrder(int32_t start,
4093 : int32_t thisLength,
4094 : const UnicodeString& srcText,
4095 : int32_t srcStart,
4096 : int32_t srcLength) const
4097 : {
4098 0 : if(srcText.isBogus()) {
4099 0 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4100 : } else {
4101 0 : srcText.pinIndices(srcStart, srcLength);
4102 0 : return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4103 : }
4104 : }
4105 :
4106 : inline int8_t
4107 0 : UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4108 0 : { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4109 :
4110 : inline int8_t
4111 : UnicodeString::compareCodePointOrder(int32_t start,
4112 : int32_t _length,
4113 : const UnicodeString& srcText) const
4114 : { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4115 :
4116 : inline int8_t
4117 : UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4118 : int32_t srcLength) const
4119 : { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4120 :
4121 : inline int8_t
4122 : UnicodeString::compareCodePointOrder(int32_t start,
4123 : int32_t _length,
4124 : const UnicodeString& srcText,
4125 : int32_t srcStart,
4126 : int32_t srcLength) const
4127 : { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4128 :
4129 : inline int8_t
4130 : UnicodeString::compareCodePointOrder(int32_t start,
4131 : int32_t _length,
4132 : const char16_t *srcChars) const
4133 : { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4134 :
4135 : inline int8_t
4136 : UnicodeString::compareCodePointOrder(int32_t start,
4137 : int32_t _length,
4138 : const char16_t *srcChars,
4139 : int32_t srcStart,
4140 : int32_t srcLength) const
4141 : { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4142 :
4143 : inline int8_t
4144 : UnicodeString::compareCodePointOrderBetween(int32_t start,
4145 : int32_t limit,
4146 : const UnicodeString& srcText,
4147 : int32_t srcStart,
4148 : int32_t srcLimit) const
4149 : { return doCompareCodePointOrder(start, limit - start,
4150 : srcText, srcStart, srcLimit - srcStart); }
4151 :
4152 : inline int8_t
4153 0 : UnicodeString::doCaseCompare(int32_t start,
4154 : int32_t thisLength,
4155 : const UnicodeString &srcText,
4156 : int32_t srcStart,
4157 : int32_t srcLength,
4158 : uint32_t options) const
4159 : {
4160 0 : if(srcText.isBogus()) {
4161 0 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4162 : } else {
4163 0 : srcText.pinIndices(srcStart, srcLength);
4164 0 : return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4165 : }
4166 : }
4167 :
4168 : inline int8_t
4169 0 : UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4170 0 : return doCaseCompare(0, length(), text, 0, text.length(), options);
4171 : }
4172 :
4173 : inline int8_t
4174 0 : UnicodeString::caseCompare(int32_t start,
4175 : int32_t _length,
4176 : const UnicodeString &srcText,
4177 : uint32_t options) const {
4178 0 : return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4179 : }
4180 :
4181 : inline int8_t
4182 : UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4183 : int32_t srcLength,
4184 : uint32_t options) const {
4185 : return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4186 : }
4187 :
4188 : inline int8_t
4189 : UnicodeString::caseCompare(int32_t start,
4190 : int32_t _length,
4191 : const UnicodeString &srcText,
4192 : int32_t srcStart,
4193 : int32_t srcLength,
4194 : uint32_t options) const {
4195 : return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4196 : }
4197 :
4198 : inline int8_t
4199 0 : UnicodeString::caseCompare(int32_t start,
4200 : int32_t _length,
4201 : const char16_t *srcChars,
4202 : uint32_t options) const {
4203 0 : return doCaseCompare(start, _length, srcChars, 0, _length, options);
4204 : }
4205 :
4206 : inline int8_t
4207 : UnicodeString::caseCompare(int32_t start,
4208 : int32_t _length,
4209 : const char16_t *srcChars,
4210 : int32_t srcStart,
4211 : int32_t srcLength,
4212 : uint32_t options) const {
4213 : return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4214 : }
4215 :
4216 : inline int8_t
4217 : UnicodeString::caseCompareBetween(int32_t start,
4218 : int32_t limit,
4219 : const UnicodeString &srcText,
4220 : int32_t srcStart,
4221 : int32_t srcLimit,
4222 : uint32_t options) const {
4223 : return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4224 : }
4225 :
4226 : inline int32_t
4227 0 : UnicodeString::indexOf(const UnicodeString& srcText,
4228 : int32_t srcStart,
4229 : int32_t srcLength,
4230 : int32_t start,
4231 : int32_t _length) const
4232 : {
4233 0 : if(!srcText.isBogus()) {
4234 0 : srcText.pinIndices(srcStart, srcLength);
4235 0 : if(srcLength > 0) {
4236 0 : return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4237 : }
4238 : }
4239 0 : return -1;
4240 : }
4241 :
4242 : inline int32_t
4243 0 : UnicodeString::indexOf(const UnicodeString& text) const
4244 0 : { return indexOf(text, 0, text.length(), 0, length()); }
4245 :
4246 : inline int32_t
4247 0 : UnicodeString::indexOf(const UnicodeString& text,
4248 : int32_t start) const {
4249 0 : pinIndex(start);
4250 0 : return indexOf(text, 0, text.length(), start, length() - start);
4251 : }
4252 :
4253 : inline int32_t
4254 : UnicodeString::indexOf(const UnicodeString& text,
4255 : int32_t start,
4256 : int32_t _length) const
4257 : { return indexOf(text, 0, text.length(), start, _length); }
4258 :
4259 : inline int32_t
4260 0 : UnicodeString::indexOf(const char16_t *srcChars,
4261 : int32_t srcLength,
4262 : int32_t start) const {
4263 0 : pinIndex(start);
4264 0 : return indexOf(srcChars, 0, srcLength, start, length() - start);
4265 : }
4266 :
4267 : inline int32_t
4268 : UnicodeString::indexOf(ConstChar16Ptr srcChars,
4269 : int32_t srcLength,
4270 : int32_t start,
4271 : int32_t _length) const
4272 : { return indexOf(srcChars, 0, srcLength, start, _length); }
4273 :
4274 : inline int32_t
4275 : UnicodeString::indexOf(char16_t c,
4276 : int32_t start,
4277 : int32_t _length) const
4278 : { return doIndexOf(c, start, _length); }
4279 :
4280 : inline int32_t
4281 0 : UnicodeString::indexOf(UChar32 c,
4282 : int32_t start,
4283 : int32_t _length) const
4284 0 : { return doIndexOf(c, start, _length); }
4285 :
4286 : inline int32_t
4287 0 : UnicodeString::indexOf(char16_t c) const
4288 0 : { return doIndexOf(c, 0, length()); }
4289 :
4290 : inline int32_t
4291 : UnicodeString::indexOf(UChar32 c) const
4292 : { return indexOf(c, 0, length()); }
4293 :
4294 : inline int32_t
4295 0 : UnicodeString::indexOf(char16_t c,
4296 : int32_t start) const {
4297 0 : pinIndex(start);
4298 0 : return doIndexOf(c, start, length() - start);
4299 : }
4300 :
4301 : inline int32_t
4302 0 : UnicodeString::indexOf(UChar32 c,
4303 : int32_t start) const {
4304 0 : pinIndex(start);
4305 0 : return indexOf(c, start, length() - start);
4306 : }
4307 :
4308 : inline int32_t
4309 : UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4310 : int32_t srcLength,
4311 : int32_t start,
4312 : int32_t _length) const
4313 : { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4314 :
4315 : inline int32_t
4316 0 : UnicodeString::lastIndexOf(const char16_t *srcChars,
4317 : int32_t srcLength,
4318 : int32_t start) const {
4319 0 : pinIndex(start);
4320 0 : return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4321 : }
4322 :
4323 : inline int32_t
4324 : UnicodeString::lastIndexOf(const UnicodeString& srcText,
4325 : int32_t srcStart,
4326 : int32_t srcLength,
4327 : int32_t start,
4328 : int32_t _length) const
4329 : {
4330 : if(!srcText.isBogus()) {
4331 : srcText.pinIndices(srcStart, srcLength);
4332 : if(srcLength > 0) {
4333 : return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4334 : }
4335 : }
4336 : return -1;
4337 : }
4338 :
4339 : inline int32_t
4340 : UnicodeString::lastIndexOf(const UnicodeString& text,
4341 : int32_t start,
4342 : int32_t _length) const
4343 : { return lastIndexOf(text, 0, text.length(), start, _length); }
4344 :
4345 : inline int32_t
4346 : UnicodeString::lastIndexOf(const UnicodeString& text,
4347 : int32_t start) const {
4348 : pinIndex(start);
4349 : return lastIndexOf(text, 0, text.length(), start, length() - start);
4350 : }
4351 :
4352 : inline int32_t
4353 : UnicodeString::lastIndexOf(const UnicodeString& text) const
4354 : { return lastIndexOf(text, 0, text.length(), 0, length()); }
4355 :
4356 : inline int32_t
4357 : UnicodeString::lastIndexOf(char16_t c,
4358 : int32_t start,
4359 : int32_t _length) const
4360 : { return doLastIndexOf(c, start, _length); }
4361 :
4362 : inline int32_t
4363 : UnicodeString::lastIndexOf(UChar32 c,
4364 : int32_t start,
4365 : int32_t _length) const {
4366 : return doLastIndexOf(c, start, _length);
4367 : }
4368 :
4369 : inline int32_t
4370 0 : UnicodeString::lastIndexOf(char16_t c) const
4371 0 : { return doLastIndexOf(c, 0, length()); }
4372 :
4373 : inline int32_t
4374 : UnicodeString::lastIndexOf(UChar32 c) const {
4375 : return lastIndexOf(c, 0, length());
4376 : }
4377 :
4378 : inline int32_t
4379 0 : UnicodeString::lastIndexOf(char16_t c,
4380 : int32_t start) const {
4381 0 : pinIndex(start);
4382 0 : return doLastIndexOf(c, start, length() - start);
4383 : }
4384 :
4385 : inline int32_t
4386 : UnicodeString::lastIndexOf(UChar32 c,
4387 : int32_t start) const {
4388 : pinIndex(start);
4389 : return lastIndexOf(c, start, length() - start);
4390 : }
4391 :
4392 : inline UBool
4393 0 : UnicodeString::startsWith(const UnicodeString& text) const
4394 0 : { return compare(0, text.length(), text, 0, text.length()) == 0; }
4395 :
4396 : inline UBool
4397 : UnicodeString::startsWith(const UnicodeString& srcText,
4398 : int32_t srcStart,
4399 : int32_t srcLength) const
4400 : { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4401 :
4402 : inline UBool
4403 0 : UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4404 0 : if(srcLength < 0) {
4405 0 : srcLength = u_strlen(toUCharPtr(srcChars));
4406 : }
4407 0 : return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4408 : }
4409 :
4410 : inline UBool
4411 : UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4412 : if(srcLength < 0) {
4413 : srcLength = u_strlen(toUCharPtr(srcChars));
4414 : }
4415 : return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4416 : }
4417 :
4418 : inline UBool
4419 : UnicodeString::endsWith(const UnicodeString& text) const
4420 : { return doCompare(length() - text.length(), text.length(),
4421 : text, 0, text.length()) == 0; }
4422 :
4423 : inline UBool
4424 0 : UnicodeString::endsWith(const UnicodeString& srcText,
4425 : int32_t srcStart,
4426 : int32_t srcLength) const {
4427 0 : srcText.pinIndices(srcStart, srcLength);
4428 0 : return doCompare(length() - srcLength, srcLength,
4429 0 : srcText, srcStart, srcLength) == 0;
4430 : }
4431 :
4432 : inline UBool
4433 0 : UnicodeString::endsWith(ConstChar16Ptr srcChars,
4434 : int32_t srcLength) const {
4435 0 : if(srcLength < 0) {
4436 0 : srcLength = u_strlen(toUCharPtr(srcChars));
4437 : }
4438 0 : return doCompare(length() - srcLength, srcLength,
4439 0 : srcChars, 0, srcLength) == 0;
4440 : }
4441 :
4442 : inline UBool
4443 : UnicodeString::endsWith(const char16_t *srcChars,
4444 : int32_t srcStart,
4445 : int32_t srcLength) const {
4446 : if(srcLength < 0) {
4447 : srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4448 : }
4449 : return doCompare(length() - srcLength, srcLength,
4450 : srcChars, srcStart, srcLength) == 0;
4451 : }
4452 :
4453 : //========================================
4454 : // replace
4455 : //========================================
4456 : inline UnicodeString&
4457 0 : UnicodeString::replace(int32_t start,
4458 : int32_t _length,
4459 : const UnicodeString& srcText)
4460 0 : { return doReplace(start, _length, srcText, 0, srcText.length()); }
4461 :
4462 : inline UnicodeString&
4463 0 : UnicodeString::replace(int32_t start,
4464 : int32_t _length,
4465 : const UnicodeString& srcText,
4466 : int32_t srcStart,
4467 : int32_t srcLength)
4468 0 : { return doReplace(start, _length, srcText, srcStart, srcLength); }
4469 :
4470 : inline UnicodeString&
4471 0 : UnicodeString::replace(int32_t start,
4472 : int32_t _length,
4473 : ConstChar16Ptr srcChars,
4474 : int32_t srcLength)
4475 0 : { return doReplace(start, _length, srcChars, 0, srcLength); }
4476 :
4477 : inline UnicodeString&
4478 : UnicodeString::replace(int32_t start,
4479 : int32_t _length,
4480 : const char16_t *srcChars,
4481 : int32_t srcStart,
4482 : int32_t srcLength)
4483 : { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4484 :
4485 : inline UnicodeString&
4486 0 : UnicodeString::replace(int32_t start,
4487 : int32_t _length,
4488 : char16_t srcChar)
4489 0 : { return doReplace(start, _length, &srcChar, 0, 1); }
4490 :
4491 : inline UnicodeString&
4492 0 : UnicodeString::replaceBetween(int32_t start,
4493 : int32_t limit,
4494 : const UnicodeString& srcText)
4495 0 : { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4496 :
4497 : inline UnicodeString&
4498 : UnicodeString::replaceBetween(int32_t start,
4499 : int32_t limit,
4500 : const UnicodeString& srcText,
4501 : int32_t srcStart,
4502 : int32_t srcLimit)
4503 : { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4504 :
4505 : inline UnicodeString&
4506 0 : UnicodeString::findAndReplace(const UnicodeString& oldText,
4507 : const UnicodeString& newText)
4508 : { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4509 0 : newText, 0, newText.length()); }
4510 :
4511 : inline UnicodeString&
4512 : UnicodeString::findAndReplace(int32_t start,
4513 : int32_t _length,
4514 : const UnicodeString& oldText,
4515 : const UnicodeString& newText)
4516 : { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4517 : newText, 0, newText.length()); }
4518 :
4519 : // ============================
4520 : // extract
4521 : // ============================
4522 : inline void
4523 0 : UnicodeString::doExtract(int32_t start,
4524 : int32_t _length,
4525 : UnicodeString& target) const
4526 0 : { target.replace(0, target.length(), *this, start, _length); }
4527 :
4528 : inline void
4529 0 : UnicodeString::extract(int32_t start,
4530 : int32_t _length,
4531 : Char16Ptr target,
4532 : int32_t targetStart) const
4533 0 : { doExtract(start, _length, target, targetStart); }
4534 :
4535 : inline void
4536 0 : UnicodeString::extract(int32_t start,
4537 : int32_t _length,
4538 : UnicodeString& target) const
4539 0 : { doExtract(start, _length, target); }
4540 :
4541 : #if !UCONFIG_NO_CONVERSION
4542 :
4543 : inline int32_t
4544 : UnicodeString::extract(int32_t start,
4545 : int32_t _length,
4546 : char *dst,
4547 : const char *codepage) const
4548 :
4549 : {
4550 : // This dstSize value will be checked explicitly
4551 : return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4552 : }
4553 :
4554 : #endif
4555 :
4556 : inline void
4557 0 : UnicodeString::extractBetween(int32_t start,
4558 : int32_t limit,
4559 : char16_t *dst,
4560 : int32_t dstStart) const {
4561 0 : pinIndex(start);
4562 0 : pinIndex(limit);
4563 0 : doExtract(start, limit - start, dst, dstStart);
4564 0 : }
4565 :
4566 : inline UnicodeString
4567 0 : UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4568 0 : return tempSubString(start, limit - start);
4569 : }
4570 :
4571 : inline char16_t
4572 0 : UnicodeString::doCharAt(int32_t offset) const
4573 : {
4574 0 : if((uint32_t)offset < (uint32_t)length()) {
4575 0 : return getArrayStart()[offset];
4576 : } else {
4577 0 : return kInvalidUChar;
4578 : }
4579 : }
4580 :
4581 : inline char16_t
4582 0 : UnicodeString::charAt(int32_t offset) const
4583 0 : { return doCharAt(offset); }
4584 :
4585 : inline char16_t
4586 0 : UnicodeString::operator[] (int32_t offset) const
4587 0 : { return doCharAt(offset); }
4588 :
4589 : inline UBool
4590 0 : UnicodeString::isEmpty() const {
4591 : // Arithmetic or logical right shift does not matter: only testing for 0.
4592 0 : return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4593 : }
4594 :
4595 : //========================================
4596 : // Write implementation methods
4597 : //========================================
4598 : inline void
4599 0 : UnicodeString::setZeroLength() {
4600 0 : fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4601 0 : }
4602 :
4603 : inline void
4604 0 : UnicodeString::setShortLength(int32_t len) {
4605 : // requires 0 <= len <= kMaxShortLength
4606 0 : fUnion.fFields.fLengthAndFlags =
4607 0 : (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4608 0 : }
4609 :
4610 : inline void
4611 0 : UnicodeString::setLength(int32_t len) {
4612 0 : if(len <= kMaxShortLength) {
4613 0 : setShortLength(len);
4614 : } else {
4615 0 : fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4616 0 : fUnion.fFields.fLength = len;
4617 : }
4618 0 : }
4619 :
4620 : inline void
4621 0 : UnicodeString::setToEmpty() {
4622 0 : fUnion.fFields.fLengthAndFlags = kShortString;
4623 0 : }
4624 :
4625 : inline void
4626 0 : UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4627 0 : setLength(len);
4628 0 : fUnion.fFields.fArray = array;
4629 0 : fUnion.fFields.fCapacity = capacity;
4630 0 : }
4631 :
4632 : inline UnicodeString&
4633 0 : UnicodeString::operator= (char16_t ch)
4634 0 : { return doReplace(0, length(), &ch, 0, 1); }
4635 :
4636 : inline UnicodeString&
4637 : UnicodeString::operator= (UChar32 ch)
4638 : { return replace(0, length(), ch); }
4639 :
4640 : inline UnicodeString&
4641 0 : UnicodeString::setTo(const UnicodeString& srcText,
4642 : int32_t srcStart,
4643 : int32_t srcLength)
4644 : {
4645 0 : unBogus();
4646 0 : return doReplace(0, length(), srcText, srcStart, srcLength);
4647 : }
4648 :
4649 : inline UnicodeString&
4650 0 : UnicodeString::setTo(const UnicodeString& srcText,
4651 : int32_t srcStart)
4652 : {
4653 0 : unBogus();
4654 0 : srcText.pinIndex(srcStart);
4655 0 : return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4656 : }
4657 :
4658 : inline UnicodeString&
4659 0 : UnicodeString::setTo(const UnicodeString& srcText)
4660 : {
4661 0 : return copyFrom(srcText);
4662 : }
4663 :
4664 : inline UnicodeString&
4665 0 : UnicodeString::setTo(const char16_t *srcChars,
4666 : int32_t srcLength)
4667 : {
4668 0 : unBogus();
4669 0 : return doReplace(0, length(), srcChars, 0, srcLength);
4670 : }
4671 :
4672 : inline UnicodeString&
4673 0 : UnicodeString::setTo(char16_t srcChar)
4674 : {
4675 0 : unBogus();
4676 0 : return doReplace(0, length(), &srcChar, 0, 1);
4677 : }
4678 :
4679 : inline UnicodeString&
4680 0 : UnicodeString::setTo(UChar32 srcChar)
4681 : {
4682 0 : unBogus();
4683 0 : return replace(0, length(), srcChar);
4684 : }
4685 :
4686 : inline UnicodeString&
4687 0 : UnicodeString::append(const UnicodeString& srcText,
4688 : int32_t srcStart,
4689 : int32_t srcLength)
4690 0 : { return doAppend(srcText, srcStart, srcLength); }
4691 :
4692 : inline UnicodeString&
4693 0 : UnicodeString::append(const UnicodeString& srcText)
4694 0 : { return doAppend(srcText, 0, srcText.length()); }
4695 :
4696 : inline UnicodeString&
4697 0 : UnicodeString::append(const char16_t *srcChars,
4698 : int32_t srcStart,
4699 : int32_t srcLength)
4700 0 : { return doAppend(srcChars, srcStart, srcLength); }
4701 :
4702 : inline UnicodeString&
4703 0 : UnicodeString::append(ConstChar16Ptr srcChars,
4704 : int32_t srcLength)
4705 0 : { return doAppend(srcChars, 0, srcLength); }
4706 :
4707 : inline UnicodeString&
4708 0 : UnicodeString::append(char16_t srcChar)
4709 0 : { return doAppend(&srcChar, 0, 1); }
4710 :
4711 : inline UnicodeString&
4712 0 : UnicodeString::operator+= (char16_t ch)
4713 0 : { return doAppend(&ch, 0, 1); }
4714 :
4715 : inline UnicodeString&
4716 0 : UnicodeString::operator+= (UChar32 ch) {
4717 0 : return append(ch);
4718 : }
4719 :
4720 : inline UnicodeString&
4721 0 : UnicodeString::operator+= (const UnicodeString& srcText)
4722 0 : { return doAppend(srcText, 0, srcText.length()); }
4723 :
4724 : inline UnicodeString&
4725 : UnicodeString::insert(int32_t start,
4726 : const UnicodeString& srcText,
4727 : int32_t srcStart,
4728 : int32_t srcLength)
4729 : { return doReplace(start, 0, srcText, srcStart, srcLength); }
4730 :
4731 : inline UnicodeString&
4732 0 : UnicodeString::insert(int32_t start,
4733 : const UnicodeString& srcText)
4734 0 : { return doReplace(start, 0, srcText, 0, srcText.length()); }
4735 :
4736 : inline UnicodeString&
4737 0 : UnicodeString::insert(int32_t start,
4738 : const char16_t *srcChars,
4739 : int32_t srcStart,
4740 : int32_t srcLength)
4741 0 : { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4742 :
4743 : inline UnicodeString&
4744 0 : UnicodeString::insert(int32_t start,
4745 : ConstChar16Ptr srcChars,
4746 : int32_t srcLength)
4747 0 : { return doReplace(start, 0, srcChars, 0, srcLength); }
4748 :
4749 : inline UnicodeString&
4750 0 : UnicodeString::insert(int32_t start,
4751 : char16_t srcChar)
4752 0 : { return doReplace(start, 0, &srcChar, 0, 1); }
4753 :
4754 : inline UnicodeString&
4755 0 : UnicodeString::insert(int32_t start,
4756 : UChar32 srcChar)
4757 0 : { return replace(start, 0, srcChar); }
4758 :
4759 :
4760 : inline UnicodeString&
4761 0 : UnicodeString::remove()
4762 : {
4763 : // remove() of a bogus string makes the string empty and non-bogus
4764 0 : if(isBogus()) {
4765 0 : setToEmpty();
4766 : } else {
4767 0 : setZeroLength();
4768 : }
4769 0 : return *this;
4770 : }
4771 :
4772 : inline UnicodeString&
4773 0 : UnicodeString::remove(int32_t start,
4774 : int32_t _length)
4775 : {
4776 0 : if(start <= 0 && _length == INT32_MAX) {
4777 : // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4778 0 : return remove();
4779 : }
4780 0 : return doReplace(start, _length, NULL, 0, 0);
4781 : }
4782 :
4783 : inline UnicodeString&
4784 0 : UnicodeString::removeBetween(int32_t start,
4785 : int32_t limit)
4786 0 : { return doReplace(start, limit - start, NULL, 0, 0); }
4787 :
4788 : inline UnicodeString &
4789 0 : UnicodeString::retainBetween(int32_t start, int32_t limit) {
4790 0 : truncate(limit);
4791 0 : return doReplace(0, start, NULL, 0, 0);
4792 : }
4793 :
4794 : inline UBool
4795 0 : UnicodeString::truncate(int32_t targetLength)
4796 : {
4797 0 : if(isBogus() && targetLength == 0) {
4798 : // truncate(0) of a bogus string makes the string empty and non-bogus
4799 0 : unBogus();
4800 0 : return FALSE;
4801 0 : } else if((uint32_t)targetLength < (uint32_t)length()) {
4802 0 : setLength(targetLength);
4803 0 : return TRUE;
4804 : } else {
4805 0 : return FALSE;
4806 : }
4807 : }
4808 :
4809 : inline UnicodeString&
4810 0 : UnicodeString::reverse()
4811 0 : { return doReverse(0, length()); }
4812 :
4813 : inline UnicodeString&
4814 : UnicodeString::reverse(int32_t start,
4815 : int32_t _length)
4816 : { return doReverse(start, _length); }
4817 :
4818 : U_NAMESPACE_END
4819 :
4820 : #endif
|