Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 : // IWYU pragma: private, include "nsString.h"
7 :
8 : #ifndef nsReadableUtils_h___
9 : #define nsReadableUtils_h___
10 :
11 : /**
12 : * I guess all the routines in this file are all mis-named.
13 : * According to our conventions, they should be |NS_xxx|.
14 : */
15 :
16 : #include "mozilla/Assertions.h"
17 : #include "nsAString.h"
18 :
19 : #include "nsTArrayForwardDeclare.h"
20 :
21 : inline size_t
22 2695 : Distance(const nsReadingIterator<char16_t>& aStart,
23 : const nsReadingIterator<char16_t>& aEnd)
24 : {
25 2695 : MOZ_ASSERT(aStart.get() <= aEnd.get());
26 2695 : return static_cast<size_t>(aEnd.get() - aStart.get());
27 : }
28 : inline size_t
29 11 : Distance(const nsReadingIterator<char>& aStart,
30 : const nsReadingIterator<char>& aEnd)
31 : {
32 11 : MOZ_ASSERT(aStart.get() <= aEnd.get());
33 11 : return static_cast<size_t>(aEnd.get() - aStart.get());
34 : }
35 :
36 : void LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest);
37 : void CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
38 : MOZ_MUST_USE bool CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
39 : const mozilla::fallible_t&);
40 :
41 : void LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
42 : void CopyASCIItoUTF16(const char* aSource, nsAString& aDest);
43 :
44 : void CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
45 : MOZ_MUST_USE bool CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
46 : const mozilla::fallible_t&);
47 : void CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
48 :
49 : void CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
50 : void CopyUTF8toUTF16(const char* aSource, nsAString& aDest);
51 :
52 : void LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest);
53 : void AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest);
54 : MOZ_MUST_USE bool AppendASCIItoUTF16(const nsACString& aSource,
55 : nsAString& aDest,
56 : const mozilla::fallible_t&);
57 :
58 : void LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest);
59 : MOZ_MUST_USE bool AppendASCIItoUTF16(const char* aSource,
60 : nsAString& aDest,
61 : const mozilla::fallible_t&);
62 : void AppendASCIItoUTF16(const char* aSource, nsAString& aDest);
63 :
64 : void AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest);
65 : MOZ_MUST_USE bool AppendUTF16toUTF8(const nsAString& aSource,
66 : nsACString& aDest,
67 : const mozilla::fallible_t&);
68 : void AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest);
69 : MOZ_MUST_USE bool AppendUTF8toUTF16(const nsACString& aSource,
70 : nsAString& aDest,
71 : const mozilla::fallible_t&);
72 :
73 : void AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest);
74 : void AppendUTF8toUTF16(const char* aSource, nsAString& aDest);
75 :
76 : /**
77 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
78 : *
79 : * Allocates and returns a new |char| buffer which you must free with |free|.
80 : * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer.
81 : * This conversion is not well defined; but it reproduces legacy string behavior.
82 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
83 : *
84 : * @param aSource a 16-bit wide string
85 : * @return a new |char| buffer you must free with |free|.
86 : */
87 : char* ToNewCString(const nsAString& aSource);
88 :
89 :
90 : /**
91 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
92 : *
93 : * Allocates and returns a new |char| buffer which you must free with |free|.
94 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
95 : *
96 : * @param aSource an 8-bit wide string
97 : * @return a new |char| buffer you must free with |free|.
98 : */
99 : char* ToNewCString(const nsACString& aSource);
100 :
101 : /**
102 : * Returns a new |char| buffer containing a zero-terminated copy of |aSource|.
103 : *
104 : * Allocates and returns a new |char| buffer which you must free with
105 : * |free|.
106 : * Performs an encoding conversion from a UTF-16 string to a UTF-8 string
107 : * copying |aSource| to your new buffer.
108 : * The new buffer is zero-terminated, but that may not help you if |aSource|
109 : * contains embedded nulls.
110 : *
111 : * @param aSource a UTF-16 string (made of char16_t's)
112 : * @param aUTF8Count the number of 8-bit units that was returned
113 : * @return a new |char| buffer you must free with |free|.
114 : */
115 :
116 : char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count = nullptr);
117 :
118 :
119 : /**
120 : * Returns a new |char16_t| buffer containing a zero-terminated copy of
121 : * |aSource|.
122 : *
123 : * Allocates and returns a new |char16_t| buffer which you must free with
124 : * |free|.
125 : * The new buffer is zero-terminated, but that may not help you if |aSource|
126 : * contains embedded nulls.
127 : *
128 : * @param aSource a UTF-16 string
129 : * @return a new |char16_t| buffer you must free with |free|.
130 : */
131 : char16_t* ToNewUnicode(const nsAString& aSource);
132 :
133 :
134 : /**
135 : * Returns a new |char16_t| buffer containing a zero-terminated copy of |aSource|.
136 : *
137 : * Allocates and returns a new |char16_t| buffer which you must free with |free|.
138 : * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer.
139 : * This conversion is not well defined; but it reproduces legacy string behavior.
140 : * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls.
141 : *
142 : * @param aSource an 8-bit wide string (a C-string, NOT UTF-8)
143 : * @return a new |char16_t| buffer you must free with |free|.
144 : */
145 : char16_t* ToNewUnicode(const nsACString& aSource);
146 :
147 : /**
148 : * Returns the required length for a char16_t buffer holding
149 : * a copy of aSource, using UTF-8 to UTF-16 conversion.
150 : * The length does NOT include any space for zero-termination.
151 : *
152 : * @param aSource an 8-bit wide string, UTF-8 encoded
153 : * @return length of UTF-16 encoded string copy, not zero-terminated
154 : */
155 : uint32_t CalcUTF8ToUnicodeLength(const nsACString& aSource);
156 :
157 : /**
158 : * Copies the source string into the specified buffer, converting UTF-8 to
159 : * UTF-16 in the process. The conversion is well defined for valid UTF-8
160 : * strings.
161 : * The copied string will be zero-terminated! Any embedded nulls will be
162 : * copied nonetheless. It is the caller's responsiblity to ensure the buffer
163 : * is large enough to hold the string copy plus one char16_t for
164 : * zero-termination!
165 : *
166 : * @see CalcUTF8ToUnicodeLength( const nsACString& )
167 : * @see UTF8ToNewUnicode( const nsACString&, uint32_t* )
168 : *
169 : * @param aSource an 8-bit wide string, UTF-8 encoded
170 : * @param aBuffer the buffer holding the converted string copy
171 : * @param aUTF16Count receiving optionally the number of 16-bit units that
172 : * were copied
173 : * @return aBuffer pointer, for convenience
174 : */
175 : char16_t* UTF8ToUnicodeBuffer(const nsACString& aSource,
176 : char16_t* aBuffer,
177 : uint32_t* aUTF16Count = nullptr);
178 :
179 : /**
180 : * Returns a new |char16_t| buffer containing a zero-terminated copy
181 : * of |aSource|.
182 : *
183 : * Allocates and returns a new |char| buffer which you must free with
184 : * |free|. Performs an encoding conversion from UTF-8 to UTF-16
185 : * while copying |aSource| to your new buffer. This conversion is well defined
186 : * for a valid UTF-8 string. The new buffer is zero-terminated, but that
187 : * may not help you if |aSource| contains embedded nulls.
188 : *
189 : * @param aSource an 8-bit wide string, UTF-8 encoded
190 : * @param aUTF16Count the number of 16-bit units that was returned
191 : * @return a new |char16_t| buffer you must free with |free|.
192 : * (UTF-16 encoded)
193 : */
194 : char16_t* UTF8ToNewUnicode(const nsACString& aSource,
195 : uint32_t* aUTF16Count = nullptr);
196 :
197 : /**
198 : * Copies |aLength| 16-bit code units from the start of |aSource| to the
199 : * |char16_t| buffer |aDest|.
200 : *
201 : * After this operation |aDest| is not null terminated.
202 : *
203 : * @param aSource a UTF-16 string
204 : * @param aSrcOffset start offset in the source string
205 : * @param aDest a |char16_t| buffer
206 : * @param aLength the number of 16-bit code units to copy
207 : * @return pointer to destination buffer - identical to |aDest|
208 : */
209 : char16_t* CopyUnicodeTo(const nsAString& aSource,
210 : uint32_t aSrcOffset,
211 : char16_t* aDest,
212 : uint32_t aLength);
213 :
214 :
215 : /**
216 : * Copies 16-bit characters between iterators |aSrcStart| and
217 : * |aSrcEnd| to the writable string |aDest|. Similar to the
218 : * |nsString::Mid| method.
219 : *
220 : * After this operation |aDest| is not null terminated.
221 : *
222 : * @param aSrcStart start source iterator
223 : * @param aSrcEnd end source iterator
224 : * @param aDest destination for the copy
225 : */
226 : void CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
227 : const nsAString::const_iterator& aSrcEnd,
228 : nsAString& aDest);
229 :
230 : /**
231 : * Appends 16-bit characters between iterators |aSrcStart| and
232 : * |aSrcEnd| to the writable string |aDest|.
233 : *
234 : * After this operation |aDest| is not null terminated.
235 : *
236 : * @param aSrcStart start source iterator
237 : * @param aSrcEnd end source iterator
238 : * @param aDest destination for the copy
239 : */
240 : void AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
241 : const nsAString::const_iterator& aSrcEnd,
242 : nsAString& aDest);
243 :
244 : /**
245 : * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
246 : *
247 : * @param aString a 16-bit wide string to scan
248 : */
249 : bool IsASCII(const nsAString& aString);
250 :
251 : /**
252 : * Returns |true| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F).
253 : *
254 : * @param aString a 8-bit wide string to scan
255 : */
256 : bool IsASCII(const nsACString& aString);
257 :
258 : /**
259 : * Returns |true| if |aString| is a valid UTF-8 string.
260 : * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator.
261 : * It is mainly written to replace and roughly equivalent to
262 : *
263 : * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
264 : *
265 : * (see bug 191541)
266 : * As such, it does not check for non-UTF-8 7bit encodings such as
267 : * ISO-2022-JP and HZ.
268 : *
269 : * It rejects sequences with the following errors:
270 : *
271 : * byte sequences that cannot be decoded into characters according to
272 : * UTF-8's rules (including cases where the input is part of a valid
273 : * UTF-8 sequence but starts or ends mid-character)
274 : * overlong sequences (i.e., cases where a character was encoded
275 : * non-canonically by using more bytes than necessary)
276 : * surrogate codepoints (i.e., the codepoints reserved for
277 : representing astral characters in UTF-16)
278 : * codepoints above the unicode range (i.e., outside the first 17
279 : * planes; higher than U+10FFFF), in accordance with
280 : * http://tools.ietf.org/html/rfc3629
281 : * when aRejectNonChar is true (the default), any codepoint whose low
282 : * 16 bits are 0xFFFE or 0xFFFF
283 :
284 : *
285 : * @param aString an 8-bit wide string to scan
286 : * @param aRejectNonChar a boolean to control the rejection of utf-8
287 : * non characters
288 : */
289 : bool IsUTF8(const nsACString& aString, bool aRejectNonChar = true);
290 :
291 : bool ParseString(const nsACString& aAstring, char aDelimiter,
292 : nsTArray<nsCString>& aArray);
293 :
294 : /**
295 : * Converts case in place in the argument string.
296 : */
297 : void ToUpperCase(nsACString&);
298 :
299 : void ToLowerCase(nsACString&);
300 :
301 : void ToUpperCase(nsACString&);
302 :
303 : void ToLowerCase(nsACString&);
304 :
305 : /**
306 : * Converts case from string aSource to aDest.
307 : */
308 : void ToUpperCase(const nsACString& aSource, nsACString& aDest);
309 :
310 : void ToLowerCase(const nsACString& aSource, nsACString& aDest);
311 :
312 : /**
313 : * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|.
314 : *
315 : * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
316 : * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
317 : *
318 : * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|.
319 : * If we need something faster, then we can implement that later.
320 : */
321 :
322 : bool FindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
323 : nsAString::const_iterator&,
324 : const nsStringComparator& = nsDefaultStringComparator());
325 : bool FindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
326 : nsACString::const_iterator&,
327 : const nsCStringComparator& = nsDefaultCStringComparator());
328 :
329 : /* sometimes we don't care about where the string was, just that we
330 : * found it or not */
331 : inline bool
332 14 : FindInReadable(const nsAString& aPattern, const nsAString& aSource,
333 : const nsStringComparator& aCompare = nsDefaultStringComparator())
334 : {
335 14 : nsAString::const_iterator start, end;
336 14 : aSource.BeginReading(start);
337 14 : aSource.EndReading(end);
338 14 : return FindInReadable(aPattern, start, end, aCompare);
339 : }
340 :
341 : inline bool
342 2 : FindInReadable(const nsACString& aPattern, const nsACString& aSource,
343 : const nsCStringComparator& aCompare = nsDefaultCStringComparator())
344 : {
345 2 : nsACString::const_iterator start, end;
346 2 : aSource.BeginReading(start);
347 2 : aSource.EndReading(end);
348 2 : return FindInReadable(aPattern, start, end, aCompare);
349 : }
350 :
351 :
352 : bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
353 : nsACString::const_iterator&,
354 : nsACString::const_iterator&);
355 :
356 : /**
357 : * Finds the rightmost occurrence of |aPattern|
358 : * Returns |true| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to
359 : * point to the match. If no match was found, returns |false| and makes |aSearchStart == aSearchEnd|.
360 : *
361 : */
362 : bool RFindInReadable(const nsAString& aPattern, nsAString::const_iterator&,
363 : nsAString::const_iterator&,
364 : const nsStringComparator& = nsDefaultStringComparator());
365 : bool RFindInReadable(const nsACString& aPattern, nsACString::const_iterator&,
366 : nsACString::const_iterator&,
367 : const nsCStringComparator& = nsDefaultCStringComparator());
368 :
369 : /**
370 : * Finds the leftmost occurrence of |aChar|, if any in the range
371 : * |aSearchStart|..|aSearchEnd|.
372 : *
373 : * Returns |true| if a match was found, and adjusts |aSearchStart| to
374 : * point to the match. If no match was found, returns |false| and
375 : * makes |aSearchStart == aSearchEnd|.
376 : */
377 : bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
378 : const nsAString::const_iterator& aSearchEnd);
379 : bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
380 : const nsACString::const_iterator& aSearchEnd);
381 :
382 : /**
383 : * Finds the number of occurences of |aChar| in the string |aStr|
384 : */
385 : uint32_t CountCharInReadable(const nsAString& aStr,
386 : char16_t aChar);
387 : uint32_t CountCharInReadable(const nsACString& aStr,
388 : char aChar);
389 :
390 : bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring);
391 : bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
392 : const nsStringComparator& aComparator);
393 : bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring);
394 : bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
395 : const nsCStringComparator& aComparator);
396 : bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring);
397 : bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
398 : const nsStringComparator& aComparator);
399 : bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring);
400 : bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
401 : const nsCStringComparator& aComparator);
402 :
403 : const nsString& EmptyString();
404 : const nsCString& EmptyCString();
405 :
406 : const nsString& NullString();
407 : const nsCString& NullCString();
408 :
409 : /**
410 : * Compare a UTF-8 string to an UTF-16 string.
411 : *
412 : * Returns 0 if the strings are equal, -1 if aUTF8String is less
413 : * than aUTF16Count, and 1 in the reverse case. In case of fatal
414 : * error (eg the strings are not valid UTF8 and UTF16 respectively),
415 : * this method will return INT32_MIN.
416 : */
417 : int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
418 : const nsAString& aUTF16String);
419 :
420 : void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest);
421 :
422 : template<class T>
423 : inline bool
424 : EnsureStringLength(T& aStr, uint32_t aLen)
425 : {
426 : aStr.SetLength(aLen);
427 : return (aStr.Length() == aLen);
428 : }
429 :
430 : #endif // !defined(nsReadableUtils_h___)
|