Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef jsstr_h
8 : #define jsstr_h
9 :
10 : #include "mozilla/HashFunctions.h"
11 : #include "mozilla/PodOperations.h"
12 :
13 : #include <stdio.h>
14 :
15 : #include "jsutil.h"
16 : #include "NamespaceImports.h"
17 :
18 : #include "gc/Rooting.h"
19 : #include "js/RootingAPI.h"
20 : #include "js/UniquePtr.h"
21 : #include "vm/Printer.h"
22 : #include "vm/Unicode.h"
23 :
24 : class JSAutoByteString;
25 : class JSLinearString;
26 :
27 : namespace js {
28 :
29 : class StringBuffer;
30 :
31 : template <AllowGC allowGC>
32 : extern JSString*
33 : ConcatStrings(JSContext* cx,
34 : typename MaybeRooted<JSString*, allowGC>::HandleType left,
35 : typename MaybeRooted<JSString*, allowGC>::HandleType right);
36 :
37 : // Return s advanced past any Unicode white space characters.
38 : template <typename CharT>
39 : static inline const CharT*
40 276 : SkipSpace(const CharT* s, const CharT* end)
41 : {
42 276 : MOZ_ASSERT(s <= end);
43 :
44 276 : while (s < end && unicode::IsSpace(*s))
45 0 : s++;
46 :
47 276 : return s;
48 : }
49 :
50 : // Return less than, equal to, or greater than zero depending on whether
51 : // s1 is less than, equal to, or greater than s2.
52 : template <typename Char1, typename Char2>
53 : inline int32_t
54 72 : CompareChars(const Char1* s1, size_t len1, const Char2* s2, size_t len2)
55 : {
56 72 : size_t n = Min(len1, len2);
57 90 : for (size_t i = 0; i < n; i++) {
58 90 : if (int32_t cmp = s1[i] - s2[i])
59 72 : return cmp;
60 : }
61 :
62 0 : return int32_t(len1 - len2);
63 : }
64 :
65 : extern int32_t
66 : CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2);
67 :
68 : } /* namespace js */
69 :
70 : struct JSSubString {
71 : JSLinearString* base;
72 : size_t offset;
73 : size_t length;
74 :
75 0 : JSSubString() { mozilla::PodZero(this); }
76 :
77 0 : void initEmpty(JSLinearString* base) {
78 0 : this->base = base;
79 0 : offset = length = 0;
80 0 : }
81 0 : void init(JSLinearString* base, size_t offset, size_t length) {
82 0 : this->base = base;
83 0 : this->offset = offset;
84 0 : this->length = length;
85 0 : }
86 : };
87 :
88 : /*
89 : * Shorthands for ASCII (7-bit) decimal and hex conversion.
90 : * Manually inline isdigit and isxdigit for performance; MSVC doesn't do this for us.
91 : */
92 : #define JS7_ISDEC(c) ((((unsigned)(c)) - '0') <= 9)
93 : #define JS7_ISA2F(c) ((((((unsigned)(c)) - 'a') <= 5) || (((unsigned)(c)) - 'A') <= 5))
94 : #define JS7_UNDEC(c) ((c) - '0')
95 : #define JS7_ISOCT(c) ((((unsigned)(c)) - '0') <= 7)
96 : #define JS7_UNOCT(c) (JS7_UNDEC(c))
97 : #define JS7_ISHEX(c) ((c) < 128 && (JS7_ISDEC(c) || JS7_ISA2F(c)))
98 : #define JS7_UNHEX(c) (unsigned)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
99 : #define JS7_ISLET(c) ((c) < 128 && isalpha(c))
100 :
101 : extern size_t
102 : js_strlen(const char16_t* s);
103 :
104 : extern int32_t
105 : js_strcmp(const char16_t* lhs, const char16_t* rhs);
106 :
107 : template <typename CharT>
108 : extern const CharT*
109 : js_strchr_limit(const CharT* s, char16_t c, const CharT* limit);
110 :
111 : static MOZ_ALWAYS_INLINE void
112 : js_strncpy(char16_t* dst, const char16_t* src, size_t nelem)
113 : {
114 : return mozilla::PodCopy(dst, src, nelem);
115 : }
116 :
117 : extern int32_t
118 : js_fputs(const char16_t* s, FILE* f);
119 :
120 : namespace js {
121 :
122 : /* Initialize the String class, returning its prototype object. */
123 : extern JSObject*
124 : InitStringClass(JSContext* cx, HandleObject obj);
125 :
126 : /*
127 : * Convert a value to a printable C string.
128 : */
129 : extern const char*
130 : ValueToPrintable(JSContext* cx, const Value&, JSAutoByteString* bytes, bool asSource = false);
131 :
132 : extern UniqueChars
133 : DuplicateString(JSContext* cx, const char* s);
134 :
135 : extern UniqueTwoByteChars
136 : DuplicateString(JSContext* cx, const char16_t* s);
137 :
138 : /*
139 : * These variants do not report OOMs, you must arrange for OOMs to be reported
140 : * yourself.
141 : */
142 : extern UniqueChars
143 : DuplicateString(const char* s);
144 :
145 : extern UniqueChars
146 : DuplicateString(const char* s, size_t n);
147 :
148 : extern UniqueTwoByteChars
149 : DuplicateString(const char16_t* s);
150 :
151 : extern UniqueTwoByteChars
152 : DuplicateString(const char16_t* s, size_t n);
153 :
154 : /*
155 : * Convert a non-string value to a string, returning null after reporting an
156 : * error, otherwise returning a new string reference.
157 : */
158 : template <AllowGC allowGC>
159 : extern JSString*
160 : ToStringSlow(JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg);
161 :
162 : /*
163 : * Convert the given value to a string. This method includes an inline
164 : * fast-path for the case where the value is already a string; if the value is
165 : * known not to be a string, use ToStringSlow instead.
166 : */
167 : template <AllowGC allowGC>
168 : static MOZ_ALWAYS_INLINE JSString*
169 2240 : ToString(JSContext* cx, JS::HandleValue v)
170 : {
171 2240 : if (v.isString())
172 2178 : return v.toString();
173 62 : return ToStringSlow<allowGC>(cx, v);
174 : }
175 :
176 : /*
177 : * This function implements E-262-3 section 9.8, toString. Convert the given
178 : * value to a string of characters appended to the given buffer. On error, the
179 : * passed buffer may have partial results appended.
180 : */
181 : inline bool
182 : ValueToStringBuffer(JSContext* cx, const Value& v, StringBuffer& sb);
183 :
184 : /*
185 : * Convert a value to its source expression, returning null after reporting
186 : * an error, otherwise returning a new string reference.
187 : */
188 : extern JSString*
189 : ValueToSource(JSContext* cx, HandleValue v);
190 :
191 : /*
192 : * Convert a JSString to its source expression; returns null after reporting an
193 : * error, otherwise returns a new string reference. No Handle needed since the
194 : * input is dead after the GC.
195 : */
196 : extern JSString*
197 : StringToSource(JSContext* cx, JSString* str);
198 :
199 : /*
200 : * Test if strings are equal. The caller can call the function even if str1
201 : * or str2 are not GC-allocated things.
202 : */
203 : extern bool
204 : EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result);
205 :
206 : /* Use the infallible method instead! */
207 : extern bool
208 : EqualStrings(JSContext* cx, JSLinearString* str1, JSLinearString* str2, bool* result) = delete;
209 :
210 : /* EqualStrings is infallible on linear strings. */
211 : extern bool
212 : EqualStrings(JSLinearString* str1, JSLinearString* str2);
213 :
214 : extern bool
215 : EqualChars(JSLinearString* str1, JSLinearString* str2);
216 :
217 : /*
218 : * Return less than, equal to, or greater than zero depending on whether
219 : * str1 is less than, equal to, or greater than str2.
220 : */
221 : extern bool
222 : CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result);
223 :
224 : /*
225 : * Same as CompareStrings but for atoms. Don't use this to just test
226 : * for equality; use this when you need an ordering on atoms.
227 : */
228 : extern int32_t
229 : CompareAtoms(JSAtom* atom1, JSAtom* atom2);
230 :
231 : /*
232 : * Return true if the string matches the given sequence of ASCII bytes.
233 : */
234 : extern bool
235 : StringEqualsAscii(JSLinearString* str, const char* asciiBytes);
236 :
237 : /* Return true if the string contains a pattern anywhere inside it. */
238 : extern bool
239 : StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
240 :
241 : extern int
242 : StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
243 :
244 : /* Return true if the string contains a pattern at |start|. */
245 : extern bool
246 : HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
247 :
248 : template <typename Char1, typename Char2>
249 : inline bool
250 : EqualChars(const Char1* s1, const Char2* s2, size_t len);
251 :
252 : template <typename Char1>
253 : inline bool
254 0 : EqualChars(const Char1* s1, const Char1* s2, size_t len)
255 : {
256 0 : return mozilla::PodEqual(s1, s2, len);
257 : }
258 :
259 : template <typename Char1, typename Char2>
260 : inline bool
261 139450 : EqualChars(const Char1* s1, const Char2* s2, size_t len)
262 : {
263 1550052 : for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
264 1410605 : if (*s1 != *s2)
265 3 : return false;
266 : }
267 139447 : return true;
268 : }
269 :
270 : /*
271 : * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
272 : * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
273 : * and constitute API misuse.
274 : */
275 : JSString*
276 : SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt);
277 :
278 : /*
279 : * Inflate bytes in ASCII encoding to char16_t code units. Return null on error,
280 : * otherwise return the char16_t buffer that was malloc'ed. length is updated to
281 : * the length of the new string (in char16_t code units). A null char is
282 : * appended, but it is not included in the length.
283 : */
284 : extern char16_t*
285 : InflateString(JSContext* cx, const char* bytes, size_t* length);
286 :
287 : /*
288 : * Inflate bytes to JS chars in an existing buffer. 'dst' must be large
289 : * enough for 'srclen' char16_t code units. The buffer is NOT null-terminated.
290 : */
291 : inline void
292 0 : CopyAndInflateChars(char16_t* dst, const char* src, size_t srclen)
293 : {
294 0 : for (size_t i = 0; i < srclen; i++)
295 0 : dst[i] = (unsigned char) src[i];
296 0 : }
297 :
298 : inline void
299 115 : CopyAndInflateChars(char16_t* dst, const JS::Latin1Char* src, size_t srclen)
300 : {
301 1843 : for (size_t i = 0; i < srclen; i++)
302 1728 : dst[i] = src[i];
303 115 : }
304 :
305 : /*
306 : * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
307 : * 'length chars. The buffer is NOT null-terminated. The destination length
308 : * must to be initialized with the buffer size and will contain on return the
309 : * number of copied bytes.
310 : */
311 : template <typename CharT>
312 : extern bool
313 : DeflateStringToBuffer(JSContext* maybecx, const CharT* chars,
314 : size_t charsLength, char* bytes, size_t* length);
315 :
316 : extern bool
317 : str_fromCharCode(JSContext* cx, unsigned argc, Value* vp);
318 :
319 : extern bool
320 : str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval);
321 :
322 : extern bool
323 : str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp);
324 :
325 : extern bool
326 : str_fromCodePoint_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval);
327 :
328 : /* String methods exposed so they can be installed in the self-hosting global. */
329 :
330 : extern bool
331 : str_includes(JSContext* cx, unsigned argc, Value* vp);
332 :
333 : extern bool
334 : str_indexOf(JSContext* cx, unsigned argc, Value* vp);
335 :
336 : extern bool
337 : str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp);
338 :
339 : extern bool
340 : str_startsWith(JSContext* cx, unsigned argc, Value* vp);
341 :
342 : extern bool
343 : str_toLowerCase(JSContext* cx, unsigned argc, Value* vp);
344 :
345 : extern bool
346 : str_toUpperCase(JSContext* cx, unsigned argc, Value* vp);
347 :
348 : extern bool
349 : str_toString(JSContext* cx, unsigned argc, Value* vp);
350 :
351 : extern bool
352 : str_charAt(JSContext* cx, unsigned argc, Value* vp);
353 :
354 : extern bool
355 : str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res);
356 :
357 : extern bool
358 : str_charCodeAt(JSContext* cx, unsigned argc, Value* vp);
359 :
360 : extern bool
361 : str_contains(JSContext *cx, unsigned argc, Value *vp);
362 :
363 : extern bool
364 : str_endsWith(JSContext* cx, unsigned argc, Value* vp);
365 :
366 : extern bool
367 : str_trim(JSContext* cx, unsigned argc, Value* vp);
368 :
369 : extern bool
370 : str_trimLeft(JSContext* cx, unsigned argc, Value* vp);
371 :
372 : extern bool
373 : str_trimRight(JSContext* cx, unsigned argc, Value* vp);
374 :
375 : #if !EXPOSE_INTL_API
376 : extern bool
377 : str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp);
378 :
379 : extern bool
380 : str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp);
381 :
382 : extern bool
383 : str_localeCompare(JSContext* cx, unsigned argc, Value* vp);
384 : #else
385 : extern bool
386 : str_normalize(JSContext* cx, unsigned argc, Value* vp);
387 : #endif
388 :
389 : extern bool
390 : str_concat(JSContext* cx, unsigned argc, Value* vp);
391 :
392 : /*
393 : * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
394 : * least 4 bytes long. Return the number of UTF-8 bytes of data written.
395 : */
396 : extern uint32_t
397 : OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char);
398 :
399 : extern size_t
400 : PutEscapedStringImpl(char* buffer, size_t size, GenericPrinter* out, JSLinearString* str,
401 : uint32_t quote);
402 :
403 : template <typename CharT>
404 : extern size_t
405 : PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars,
406 : size_t length, uint32_t quote);
407 :
408 : /*
409 : * Write str into buffer escaping any non-printable or non-ASCII character
410 : * using \escapes for JS string literals.
411 : * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
412 : * the length of the written output, NOT including the NUL. Thus, a return
413 : * value of size or more means that the output was truncated. If buffer
414 : * is null, just returns the length of the output. If quote is not 0, it must
415 : * be a single or double quote character that will quote the output.
416 : */
417 : inline size_t
418 0 : PutEscapedString(char* buffer, size_t size, JSLinearString* str, uint32_t quote)
419 : {
420 0 : size_t n = PutEscapedStringImpl(buffer, size, nullptr, str, quote);
421 :
422 : /* PutEscapedStringImpl can only fail with a file. */
423 0 : MOZ_ASSERT(n != size_t(-1));
424 0 : return n;
425 : }
426 :
427 : template <typename CharT>
428 : inline size_t
429 0 : PutEscapedString(char* buffer, size_t bufferSize, const CharT* chars, size_t length, uint32_t quote)
430 : {
431 0 : size_t n = PutEscapedStringImpl(buffer, bufferSize, nullptr, chars, length, quote);
432 :
433 : /* PutEscapedStringImpl can only fail with a file. */
434 0 : MOZ_ASSERT(n != size_t(-1));
435 0 : return n;
436 : }
437 :
438 : inline bool
439 0 : EscapedStringPrinter(GenericPrinter& out, JSLinearString* str, uint32_t quote)
440 : {
441 0 : return PutEscapedStringImpl(nullptr, 0, &out, str, quote) != size_t(-1);
442 : }
443 :
444 : inline bool
445 0 : EscapedStringPrinter(GenericPrinter& out, const char* chars, size_t length, uint32_t quote)
446 : {
447 0 : return PutEscapedStringImpl(nullptr, 0, &out, chars, length, quote) != size_t(-1);
448 : }
449 :
450 : /*
451 : * Write str into file escaping any non-printable or non-ASCII character.
452 : * If quote is not 0, it must be a single or double quote character that
453 : * will quote the output.
454 : */
455 : inline bool
456 0 : FileEscapedString(FILE* fp, JSLinearString* str, uint32_t quote)
457 : {
458 0 : Fprinter out(fp);
459 0 : bool res = EscapedStringPrinter(out, str, quote);
460 0 : out.finish();
461 0 : return res;
462 : }
463 :
464 : inline bool
465 0 : FileEscapedString(FILE* fp, const char* chars, size_t length, uint32_t quote)
466 : {
467 0 : Fprinter out(fp);
468 0 : bool res = EscapedStringPrinter(out, chars, length, quote);
469 0 : out.finish();
470 0 : return res;
471 : }
472 :
473 : bool
474 : EncodeURI(JSContext* cx, StringBuffer& sb, const char* chars, size_t length);
475 :
476 : JSObject*
477 : str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep,
478 : uint32_t limit);
479 :
480 : JSString *
481 : str_flat_replace_string(JSContext *cx, HandleString string, HandleString pattern,
482 : HandleString replacement);
483 :
484 : JSString*
485 : str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
486 : HandleString replacement);
487 :
488 : extern JSString*
489 : StringToLowerCase(JSContext* cx, HandleLinearString string);
490 :
491 : extern JSString*
492 : StringToUpperCase(JSContext* cx, HandleLinearString string);
493 :
494 : extern bool
495 : StringConstructor(JSContext* cx, unsigned argc, Value* vp);
496 :
497 : extern bool
498 : FlatStringMatch(JSContext* cx, unsigned argc, Value* vp);
499 :
500 : extern bool
501 : FlatStringSearch(JSContext* cx, unsigned argc, Value* vp);
502 :
503 : } /* namespace js */
504 :
505 : #endif /* jsstr_h */
|