Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef vm_String_h
8 : #define vm_String_h
9 :
10 : #include "mozilla/MemoryReporting.h"
11 : #include "mozilla/PodOperations.h"
12 : #include "mozilla/Range.h"
13 :
14 : #include "jsapi.h"
15 : #include "jsfriendapi.h"
16 : #include "jsstr.h"
17 :
18 : #include "gc/Barrier.h"
19 : #include "gc/Heap.h"
20 : #include "gc/Marking.h"
21 : #include "gc/Rooting.h"
22 : #include "js/CharacterEncoding.h"
23 : #include "js/GCAPI.h"
24 : #include "js/RootingAPI.h"
25 :
26 : class JSDependentString;
27 : class JSExtensibleString;
28 : class JSExternalString;
29 : class JSInlineString;
30 : class JSRope;
31 :
32 : namespace js {
33 :
34 : class AutoStableStringChars;
35 : class StaticStrings;
36 : class PropertyName;
37 :
38 : /* The buffer length required to contain any unsigned 32-bit integer. */
39 : static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1;
40 :
41 : } /* namespace js */
42 :
43 : /*
44 : * JavaScript strings
45 : *
46 : * Conceptually, a JS string is just an array of chars and a length. This array
47 : * of chars may or may not be null-terminated and, if it is, the null character
48 : * is not included in the length.
49 : *
50 : * To improve performance of common operations, the following optimizations are
51 : * made which affect the engine's representation of strings:
52 : *
53 : * - The plain vanilla representation is a "flat" string which consists of a
54 : * string header in the GC heap and a malloc'd null terminated char array.
55 : *
56 : * - To avoid copying a substring of an existing "base" string , a "dependent"
57 : * string (JSDependentString) can be created which points into the base
58 : * string's char array.
59 : *
60 : * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
61 : * to represent a delayed string concatenation. Concatenation (called
62 : * flattening) is performed if and when a linear char array is requested. In
63 : * general, ropes form a binary dag whose internal nodes are JSRope string
64 : * headers with no associated char array and whose leaf nodes are either flat
65 : * or dependent strings.
66 : *
67 : * - To avoid copying the leftmost string when flattening, we may produce an
68 : * "extensible" string, which tracks not only its actual length but also its
69 : * buffer's overall size. If such an "extensible" string appears as the
70 : * leftmost string in a subsequent flatten, and its buffer has enough unused
71 : * space, we can simply flatten the rest of the ropes into its buffer,
72 : * leaving its text in place. We then transfer ownership of its buffer to the
73 : * flattened rope, and mutate the donor extensible string into a dependent
74 : * string referencing its original buffer.
75 : *
76 : * (The term "extensible" does not imply that we ever 'realloc' the buffer.
77 : * Extensible strings may have dependent strings pointing into them, and the
78 : * JSAPI hands out pointers to flat strings' buffers, so resizing with
79 : * 'realloc' is generally not possible.)
80 : *
81 : * - To avoid allocating small char arrays, short strings can be stored inline
82 : * in the string header (JSInlineString). These come in two flavours:
83 : * JSThinInlineString, which is the same size as JSString; and
84 : * JSFatInlineString, which has a larger header and so can fit more chars.
85 : *
86 : * - To avoid comparing O(n) string equality comparison, strings can be
87 : * canonicalized to "atoms" (JSAtom) such that there is a single atom with a
88 : * given (length,chars).
89 : *
90 : * - To avoid copying all strings created through the JSAPI, an "external"
91 : * string (JSExternalString) can be created whose chars are managed by the
92 : * JSAPI client.
93 : *
94 : * - To avoid using two bytes per character for every string, string characters
95 : * are stored as Latin1 instead of TwoByte if all characters are representable
96 : * in Latin1.
97 : *
98 : * - To avoid slow conversions from strings to integer indexes, we cache 16 bit
99 : * unsigned indexes on strings representing such numbers.
100 : *
101 : * Although all strings share the same basic memory layout, we can conceptually
102 : * arrange them into a hierarchy of operations/invariants and represent this
103 : * hierarchy in C++ with classes:
104 : *
105 : * C++ type operations+fields / invariants+properties
106 : * ========================== =========================================
107 : * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
108 : * | \
109 : * | JSRope leftChild, rightChild / -
110 : * |
111 : * JSLinearString (abstract) latin1Chars, twoByteChars / might be null-terminated
112 : * | |
113 : * | +-- JSDependentString base / -
114 : * | |
115 : * | +-- JSExternalString - / char array memory managed by embedding
116 : * |
117 : * JSFlatString - / null terminated
118 : * | |
119 : * | +-- JSExtensibleString tracks total buffer capacity (including current text)
120 : * | |
121 : * | +-- JSUndependedString original dependent base / -
122 : * | |
123 : * | +-- JSInlineString (abstract) - / chars stored in header
124 : * | |
125 : * | +-- JSThinInlineString - / header is normal
126 : * | |
127 : * | +-- JSFatInlineString - / header is fat
128 : * |
129 : * JSAtom (abstract) - / string equality === pointer equality
130 : * | |
131 : * | +-- js::NormalAtom - JSFlatString + atom hash code
132 : * | |
133 : * | +-- js::FatInlineAtom - JSFatInlineString + atom hash code
134 : * |
135 : * js::PropertyName - / chars don't contain an index (uint32_t)
136 : *
137 : * Classes marked with (abstract) above are not literally C++ Abstract Base
138 : * Classes (since there are no virtual functions, pure or not, in this
139 : * hierarchy), but have the same meaning: there are no strings with this type as
140 : * its most-derived type.
141 : *
142 : * Atoms can additionally be permanent, i.e. unable to be collected, and can
143 : * be combined with other string types to create additional most-derived types
144 : * that satisfy the invariants of more than one of the abovementioned
145 : * most-derived types. Furthermore, each atom stores a hash number (based on its
146 : * chars). This hash number is used as key in the atoms table and when the atom
147 : * is used as key in a JS Map/Set.
148 : *
149 : * Derived string types can be queried from ancestor types via isX() and
150 : * retrieved with asX() debug-only-checked casts.
151 : *
152 : * The ensureX() operations mutate 'this' in place to effectively the type to be
153 : * at least X (e.g., ensureLinear will change a JSRope to be a JSFlatString).
154 : */
155 :
156 : class JSString : public js::gc::TenuredCell
157 : {
158 : protected:
159 : static const size_t NUM_INLINE_CHARS_LATIN1 = 2 * sizeof(void*) / sizeof(JS::Latin1Char);
160 : static const size_t NUM_INLINE_CHARS_TWO_BYTE = 2 * sizeof(void*) / sizeof(char16_t);
161 :
162 : /* Fields only apply to string types commented on the right. */
163 : struct Data
164 : {
165 : union {
166 : struct {
167 : uint32_t flags; /* JSString */
168 : uint32_t length; /* JSString */
169 : };
170 : uintptr_t flattenData; /* JSRope (temporary while flattening) */
171 : } u1;
172 : union {
173 : union {
174 : /* JS(Fat)InlineString */
175 : JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1];
176 : char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE];
177 : };
178 : struct {
179 : union {
180 : const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except JS(Fat)InlineString */
181 : const char16_t* nonInlineCharsTwoByte;/* JSLinearString, except JS(Fat)InlineString */
182 : JSString* left; /* JSRope */
183 : } u2;
184 : union {
185 : JSLinearString* base; /* JS(Dependent|Undepended)String */
186 : JSString* right; /* JSRope */
187 : size_t capacity; /* JSFlatString (extensible) */
188 : const JSStringFinalizer* externalFinalizer;/* JSExternalString */
189 : } u3;
190 : } s;
191 : };
192 : } d;
193 :
194 : public:
195 : /* Flags exposed only for jits */
196 :
197 : /*
198 : * The Flags Word
199 : *
200 : * The flags word stores both the string's type and its character encoding.
201 : *
202 : * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
203 : * instead of TwoByte. This flag can also be set for ropes, if both the
204 : * left and right nodes are Latin1. Flattening will result in a Latin1
205 : * string in this case.
206 : *
207 : * The other flags store the string's type. Instead of using a dense index
208 : * to represent the most-derived type, string types are encoded to allow
209 : * single-op tests for hot queries (isRope, isDependent, isFlat, isAtom)
210 : * which, in view of subtyping, would require slower
211 : * (isX() || isY() || isZ()).
212 : *
213 : * The string type encoding can be summarized as follows. The "instance
214 : * encoding" entry for a type specifies the flag bits used to create a
215 : * string instance of that type. Abstract types have no instances and thus
216 : * have no such entry. The "subtype predicate" entry for a type specifies
217 : * the predicate used to query whether a JSString instance is subtype
218 : * (reflexively) of that type.
219 : *
220 : * String Instance Subtype
221 : * type encoding predicate
222 : * ------------------------------------
223 : * Rope 000000 000000
224 : * Linear - !000000
225 : * HasBase - xxxx1x
226 : * Dependent 000010 000010
227 : * External 100000 100000
228 : * Flat - xxxxx1
229 : * Undepended 000011 000011
230 : * Extensible 010001 010001
231 : * Inline 000101 xxx1xx
232 : * FatInline 010101 x1x1xx
233 : * Atom 001001 xx1xxx
234 : * PermanentAtom 101001 1x1xxx
235 : * InlineAtom - xx11xx
236 : * FatInlineAtom - x111xx
237 : *
238 : * Note that the first 4 flag bits (from right to left in the previous table)
239 : * have the following meaning and can be used for some hot queries:
240 : *
241 : * Bit 0: IsFlat
242 : * Bit 1: HasBase (Dependent, Undepended)
243 : * Bit 2: IsInline (Inline, FatInline)
244 : * Bit 3: IsAtom (Atom, PermanentAtom)
245 : *
246 : * "HasBase" here refers to the two string types that have a 'base' field:
247 : * JSDependentString and JSUndependedString.
248 : * A JSUndependedString is a JSDependentString which has been 'fixed' (by ensureFixed)
249 : * to be null-terminated. In such cases, the string must keep marking its base since
250 : * there may be any number of *other* JSDependentStrings transitively depending on it.
251 : *
252 : * If the INDEX_VALUE_BIT is set the upper 16 bits of the flag word hold the integer
253 : * index.
254 : */
255 :
256 : static const uint32_t FLAT_BIT = JS_BIT(0);
257 : static const uint32_t HAS_BASE_BIT = JS_BIT(1);
258 : static const uint32_t INLINE_CHARS_BIT = JS_BIT(2);
259 : static const uint32_t ATOM_BIT = JS_BIT(3);
260 :
261 : static const uint32_t ROPE_FLAGS = 0;
262 : static const uint32_t DEPENDENT_FLAGS = HAS_BASE_BIT;
263 : static const uint32_t UNDEPENDED_FLAGS = FLAT_BIT | HAS_BASE_BIT;
264 : static const uint32_t EXTENSIBLE_FLAGS = FLAT_BIT | JS_BIT(4);
265 : static const uint32_t EXTERNAL_FLAGS = JS_BIT(5);
266 :
267 : static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | JS_BIT(4);
268 : static const uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | JS_BIT(5);
269 :
270 : /* Initial flags for thin inline and fat inline strings. */
271 : static const uint32_t INIT_THIN_INLINE_FLAGS = FLAT_BIT | INLINE_CHARS_BIT;
272 : static const uint32_t INIT_FAT_INLINE_FLAGS = FLAT_BIT | FAT_INLINE_MASK;
273 :
274 : static const uint32_t TYPE_FLAGS_MASK = JS_BIT(6) - 1;
275 :
276 : static const uint32_t LATIN1_CHARS_BIT = JS_BIT(6);
277 :
278 : static const uint32_t INDEX_VALUE_BIT = JS_BIT(7);
279 : static const uint32_t INDEX_VALUE_SHIFT = 16;
280 :
281 : static const uint32_t MAX_LENGTH = js::MaxStringLength;
282 :
283 : static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff;
284 :
285 : /*
286 : * Helper function to validate that a string of a given length is
287 : * representable by a JSString. An allocation overflow is reported if false
288 : * is returned.
289 : */
290 : static inline bool validateLength(JSContext* maybecx, size_t length);
291 :
292 : static void staticAsserts() {
293 : static_assert(JSString::MAX_LENGTH < UINT32_MAX, "Length must fit in 32 bits");
294 : static_assert(sizeof(JSString) ==
295 : (offsetof(JSString, d.inlineStorageLatin1) +
296 : NUM_INLINE_CHARS_LATIN1 * sizeof(char)),
297 : "Inline Latin1 chars must fit in a JSString");
298 : static_assert(sizeof(JSString) ==
299 : (offsetof(JSString, d.inlineStorageTwoByte) +
300 : NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)),
301 : "Inline char16_t chars must fit in a JSString");
302 :
303 : /* Ensure js::shadow::String has the same layout. */
304 : using js::shadow::String;
305 : static_assert(offsetof(JSString, d.u1.length) == offsetof(String, length),
306 : "shadow::String length offset must match JSString");
307 : static_assert(offsetof(JSString, d.u1.flags) == offsetof(String, flags),
308 : "shadow::String flags offset must match JSString");
309 : static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) == offsetof(String, nonInlineCharsLatin1),
310 : "shadow::String nonInlineChars offset must match JSString");
311 : static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == offsetof(String, nonInlineCharsTwoByte),
312 : "shadow::String nonInlineChars offset must match JSString");
313 : static_assert(offsetof(JSString, d.s.u3.externalFinalizer) == offsetof(String, externalFinalizer),
314 : "shadow::String externalFinalizer offset must match JSString");
315 : static_assert(offsetof(JSString, d.inlineStorageLatin1) == offsetof(String, inlineStorageLatin1),
316 : "shadow::String inlineStorage offset must match JSString");
317 : static_assert(offsetof(JSString, d.inlineStorageTwoByte) == offsetof(String, inlineStorageTwoByte),
318 : "shadow::String inlineStorage offset must match JSString");
319 : static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT,
320 : "shadow::String::INLINE_CHARS_BIT must match JSString::INLINE_CHARS_BIT");
321 : static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT,
322 : "shadow::String::LATIN1_CHARS_BIT must match JSString::LATIN1_CHARS_BIT");
323 : static_assert(TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK,
324 : "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
325 : static_assert(ROPE_FLAGS == String::ROPE_FLAGS,
326 : "shadow::String::ROPE_FLAGS must match JSString::ROPE_FLAGS");
327 : static_assert(EXTERNAL_FLAGS == String::EXTERNAL_FLAGS,
328 : "shadow::String::EXTERNAL_FLAGS must match JSString::EXTERNAL_FLAGS");
329 : }
330 :
331 : /* Avoid lame compile errors in JSRope::flatten */
332 : friend class JSRope;
333 :
334 : friend class js::gc::RelocationOverlay;
335 :
336 : protected:
337 : template <typename CharT>
338 : MOZ_ALWAYS_INLINE
339 : void setNonInlineChars(const CharT* chars);
340 :
341 : public:
342 : /* All strings have length. */
343 :
344 : MOZ_ALWAYS_INLINE
345 1596547 : size_t length() const {
346 1596547 : return d.u1.length;
347 : }
348 :
349 : MOZ_ALWAYS_INLINE
350 870 : bool empty() const {
351 870 : return d.u1.length == 0;
352 : }
353 :
354 : inline bool getChar(JSContext* cx, size_t index, char16_t* code);
355 :
356 : /* Strings have either Latin1 or TwoByte chars. */
357 4987053 : bool hasLatin1Chars() const {
358 4987053 : return d.u1.flags & LATIN1_CHARS_BIT;
359 : }
360 23608 : bool hasTwoByteChars() const {
361 23608 : return !(d.u1.flags & LATIN1_CHARS_BIT);
362 : }
363 :
364 : /* Strings might contain cached indexes. */
365 1220664 : bool hasIndexValue() const {
366 1220664 : return d.u1.flags & INDEX_VALUE_BIT;
367 : }
368 36 : uint32_t getIndexValue() const {
369 36 : MOZ_ASSERT(hasIndexValue());
370 36 : MOZ_ASSERT(isFlat());
371 36 : return d.u1.flags >> INDEX_VALUE_SHIFT;
372 : }
373 :
374 : /* Fallible conversions to more-derived string types. */
375 :
376 : inline JSLinearString* ensureLinear(JSContext* cx);
377 : JSFlatString* ensureFlat(JSContext* cx);
378 :
379 : static bool ensureLinear(JSContext* cx, JSString* str) {
380 : return str->ensureLinear(cx) != nullptr;
381 : }
382 :
383 : /* Type query and debug-checked casts */
384 :
385 : MOZ_ALWAYS_INLINE
386 2828132 : bool isRope() const {
387 2828132 : return (d.u1.flags & TYPE_FLAGS_MASK) == ROPE_FLAGS;
388 : }
389 :
390 : MOZ_ALWAYS_INLINE
391 2990 : JSRope& asRope() const {
392 2990 : MOZ_ASSERT(isRope());
393 2990 : return *(JSRope*)this;
394 : }
395 :
396 : MOZ_ALWAYS_INLINE
397 2795537 : bool isLinear() const {
398 2795537 : return !isRope();
399 : }
400 :
401 : MOZ_ALWAYS_INLINE
402 82321 : JSLinearString& asLinear() const {
403 82321 : MOZ_ASSERT(JSString::isLinear());
404 82321 : return *(JSLinearString*)this;
405 : }
406 :
407 : MOZ_ALWAYS_INLINE
408 514 : bool isDependent() const {
409 514 : return (d.u1.flags & TYPE_FLAGS_MASK) == DEPENDENT_FLAGS;
410 : }
411 :
412 : MOZ_ALWAYS_INLINE
413 33 : JSDependentString& asDependent() const {
414 33 : MOZ_ASSERT(isDependent());
415 33 : return *(JSDependentString*)this;
416 : }
417 :
418 : MOZ_ALWAYS_INLINE
419 2425178 : bool isFlat() const {
420 2425178 : return d.u1.flags & FLAT_BIT;
421 : }
422 :
423 : MOZ_ALWAYS_INLINE
424 2048 : JSFlatString& asFlat() const {
425 2048 : MOZ_ASSERT(isFlat());
426 2048 : return *(JSFlatString*)this;
427 : }
428 :
429 : MOZ_ALWAYS_INLINE
430 997 : bool isExtensible() const {
431 997 : return (d.u1.flags & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS;
432 : }
433 :
434 : MOZ_ALWAYS_INLINE
435 40 : JSExtensibleString& asExtensible() const {
436 40 : MOZ_ASSERT(isExtensible());
437 40 : return *(JSExtensibleString*)this;
438 : }
439 :
440 : MOZ_ALWAYS_INLINE
441 2476639 : bool isInline() const {
442 2476639 : return d.u1.flags & INLINE_CHARS_BIT;
443 : }
444 :
445 : MOZ_ALWAYS_INLINE
446 0 : JSInlineString& asInline() const {
447 0 : MOZ_ASSERT(isInline());
448 0 : return *(JSInlineString*)this;
449 : }
450 :
451 : MOZ_ALWAYS_INLINE
452 1354235 : bool isFatInline() const {
453 1354235 : return (d.u1.flags & FAT_INLINE_MASK) == FAT_INLINE_MASK;
454 : }
455 :
456 : /* For hot code, prefer other type queries. */
457 8220 : bool isExternal() const {
458 8220 : return (d.u1.flags & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS;
459 : }
460 :
461 : MOZ_ALWAYS_INLINE
462 89 : JSExternalString& asExternal() const {
463 89 : MOZ_ASSERT(isExternal());
464 89 : return *(JSExternalString*)this;
465 : }
466 :
467 : MOZ_ALWAYS_INLINE
468 13 : bool isUndepended() const {
469 13 : return (d.u1.flags & TYPE_FLAGS_MASK) == UNDEPENDED_FLAGS;
470 : }
471 :
472 : MOZ_ALWAYS_INLINE
473 848022 : bool isAtom() const {
474 848022 : return d.u1.flags & ATOM_BIT;
475 : }
476 :
477 : MOZ_ALWAYS_INLINE
478 1210894 : bool isPermanentAtom() const {
479 1210894 : return (d.u1.flags & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK;
480 : }
481 :
482 : MOZ_ALWAYS_INLINE
483 623627 : JSAtom& asAtom() const {
484 623627 : MOZ_ASSERT(isAtom());
485 623632 : return *(JSAtom*)this;
486 : }
487 :
488 : // Fills |array| with various strings that represent the different string
489 : // kinds and character encodings.
490 : static bool fillWithRepresentatives(JSContext* cx, js::HandleArrayObject array);
491 :
492 : /* Only called by the GC for dependent or undepended strings. */
493 :
494 7844 : inline bool hasBase() const {
495 7844 : return d.u1.flags & HAS_BASE_BIT;
496 : }
497 :
498 : inline JSLinearString* base() const;
499 :
500 : void traceBase(JSTracer* trc);
501 :
502 : /* Only called by the GC for strings with the AllocKind::STRING kind. */
503 :
504 : inline void finalize(js::FreeOp* fop);
505 :
506 : /* Gets the number of bytes that the chars take on the heap. */
507 :
508 : size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
509 :
510 : /* Offsets for direct field from jit code. */
511 :
512 170 : static size_t offsetOfLength() {
513 170 : return offsetof(JSString, d.u1.length);
514 : }
515 157 : static size_t offsetOfFlags() {
516 157 : return offsetof(JSString, d.u1.flags);
517 : }
518 :
519 30 : static size_t offsetOfNonInlineChars() {
520 : static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
521 : offsetof(JSString, d.s.u2.nonInlineCharsLatin1),
522 : "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
523 30 : return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte);
524 : }
525 :
526 : static const JS::TraceKind TraceKind = JS::TraceKind::String;
527 :
528 : #ifdef DEBUG
529 : void dump(FILE* fp);
530 : void dumpCharsNoNewline(FILE* fp);
531 : void dump();
532 : void dumpCharsNoNewline();
533 : void dumpRepresentation(FILE* fp, int indent) const;
534 : void dumpRepresentationHeader(FILE* fp, int indent, const char* subclass) const;
535 :
536 : template <typename CharT>
537 : static void dumpChars(const CharT* s, size_t len, FILE* fp=stderr);
538 :
539 : bool equals(const char* s);
540 : #endif
541 :
542 : void traceChildren(JSTracer* trc);
543 :
544 220647 : static MOZ_ALWAYS_INLINE void readBarrier(JSString* thing) {
545 220647 : if (thing->isPermanentAtom())
546 5275 : return;
547 :
548 215372 : TenuredCell::readBarrier(thing);
549 : }
550 :
551 44090 : static MOZ_ALWAYS_INLINE void writeBarrierPre(JSString* thing) {
552 44090 : if (!thing || thing->isPermanentAtom())
553 29352 : return;
554 :
555 14738 : TenuredCell::writeBarrierPre(thing);
556 : }
557 :
558 : private:
559 : JSString() = delete;
560 : JSString(const JSString& other) = delete;
561 : void operator=(const JSString& other) = delete;
562 : };
563 :
564 : class JSRope : public JSString
565 : {
566 : template <typename CharT>
567 : bool copyCharsInternal(JSContext* cx, js::ScopedJSFreePtr<CharT>& out,
568 : bool nullTerminate) const;
569 :
570 : enum UsingBarrier { WithIncrementalBarrier, NoBarrier };
571 :
572 : template<UsingBarrier b, typename CharT>
573 : JSFlatString* flattenInternal(JSContext* cx);
574 :
575 : template<UsingBarrier b>
576 : JSFlatString* flattenInternal(JSContext* cx);
577 :
578 : friend class JSString;
579 : JSFlatString* flatten(JSContext* cx);
580 :
581 : void init(JSContext* cx, JSString* left, JSString* right, size_t length);
582 :
583 : public:
584 : template <js::AllowGC allowGC>
585 : static inline JSRope* new_(JSContext* cx,
586 : typename js::MaybeRooted<JSString*, allowGC>::HandleType left,
587 : typename js::MaybeRooted<JSString*, allowGC>::HandleType right,
588 : size_t length);
589 :
590 : bool copyLatin1Chars(JSContext* cx,
591 : js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
592 : bool copyTwoByteChars(JSContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
593 :
594 : bool copyLatin1CharsZ(JSContext* cx,
595 : js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
596 : bool copyTwoByteCharsZ(JSContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
597 :
598 : template <typename CharT>
599 : bool copyChars(JSContext* cx, js::ScopedJSFreePtr<CharT>& out) const;
600 :
601 6000 : JSString* leftChild() const {
602 6000 : MOZ_ASSERT(isRope());
603 6000 : return d.s.u2.left;
604 : }
605 :
606 4 : JSString* rightChild() const {
607 4 : MOZ_ASSERT(isRope());
608 4 : return d.s.u3.right;
609 : }
610 :
611 : void traceChildren(JSTracer* trc);
612 :
613 7 : static size_t offsetOfLeft() {
614 7 : return offsetof(JSRope, d.s.u2.left);
615 : }
616 4 : static size_t offsetOfRight() {
617 4 : return offsetof(JSRope, d.s.u3.right);
618 : }
619 :
620 : #ifdef DEBUG
621 : void dumpRepresentation(FILE* fp, int indent) const;
622 : #endif
623 : };
624 :
625 : static_assert(sizeof(JSRope) == sizeof(JSString),
626 : "string subclasses must be binary-compatible with JSString");
627 :
628 : class JSLinearString : public JSString
629 : {
630 : friend class JSString;
631 : friend class js::AutoStableStringChars;
632 :
633 : /* Vacuous and therefore unimplemented. */
634 : JSLinearString* ensureLinear(JSContext* cx) = delete;
635 : bool isLinear() const = delete;
636 : JSLinearString& asLinear() const = delete;
637 :
638 : protected:
639 : /* Returns void pointer to latin1/twoByte chars, for finalizers. */
640 : MOZ_ALWAYS_INLINE
641 0 : void* nonInlineCharsRaw() const {
642 0 : MOZ_ASSERT(!isInline());
643 : static_assert(offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) ==
644 : offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1),
645 : "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
646 0 : return (void*)d.s.u2.nonInlineCharsTwoByte;
647 : }
648 :
649 : MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const;
650 : MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const;
651 :
652 : public:
653 : template<typename CharT>
654 : MOZ_ALWAYS_INLINE
655 : const CharT* nonInlineChars(const JS::AutoCheckCannotGC& nogc) const;
656 :
657 : MOZ_ALWAYS_INLINE
658 2130 : const JS::Latin1Char* nonInlineLatin1Chars(const JS::AutoCheckCannotGC& nogc) const {
659 2130 : MOZ_ASSERT(!isInline());
660 2130 : MOZ_ASSERT(hasLatin1Chars());
661 2130 : return d.s.u2.nonInlineCharsLatin1;
662 : }
663 :
664 : MOZ_ALWAYS_INLINE
665 248 : const char16_t* nonInlineTwoByteChars(const JS::AutoCheckCannotGC& nogc) const {
666 248 : MOZ_ASSERT(!isInline());
667 248 : MOZ_ASSERT(hasTwoByteChars());
668 248 : return d.s.u2.nonInlineCharsTwoByte;
669 : }
670 :
671 : template<typename CharT>
672 : MOZ_ALWAYS_INLINE
673 : const CharT* chars(const JS::AutoCheckCannotGC& nogc) const;
674 :
675 : MOZ_ALWAYS_INLINE
676 2458197 : const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
677 2458197 : return rawLatin1Chars();
678 : }
679 :
680 : MOZ_ALWAYS_INLINE
681 15034 : const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
682 15034 : return rawTwoByteChars();
683 : }
684 :
685 654 : mozilla::Range<const JS::Latin1Char> latin1Range(const JS::AutoCheckCannotGC& nogc) const {
686 654 : MOZ_ASSERT(JSString::isLinear());
687 654 : return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
688 : }
689 :
690 2 : mozilla::Range<const char16_t> twoByteRange(const JS::AutoCheckCannotGC& nogc) const {
691 2 : MOZ_ASSERT(JSString::isLinear());
692 2 : return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
693 : }
694 :
695 : MOZ_ALWAYS_INLINE
696 168553 : char16_t latin1OrTwoByteChar(size_t index) const {
697 168553 : MOZ_ASSERT(JSString::isLinear());
698 168552 : MOZ_ASSERT(index < length());
699 337106 : JS::AutoCheckCannotGC nogc;
700 337108 : return hasLatin1Chars() ? latin1Chars(nogc)[index] : twoByteChars(nogc)[index];
701 : }
702 :
703 : #ifdef DEBUG
704 : void dumpRepresentationChars(FILE* fp, int indent) const;
705 : #endif
706 : };
707 :
708 : static_assert(sizeof(JSLinearString) == sizeof(JSString),
709 : "string subclasses must be binary-compatible with JSString");
710 :
711 : class JSDependentString : public JSLinearString
712 : {
713 : friend class JSString;
714 : JSFlatString* undepend(JSContext* cx);
715 :
716 : template <typename CharT>
717 : JSFlatString* undependInternal(JSContext* cx);
718 :
719 : void init(JSContext* cx, JSLinearString* base, size_t start,
720 : size_t length);
721 :
722 : /* Vacuous and therefore unimplemented. */
723 : bool isDependent() const = delete;
724 : JSDependentString& asDependent() const = delete;
725 :
726 : /* The offset of this string's chars in base->chars(). */
727 13 : MOZ_ALWAYS_INLINE mozilla::Maybe<size_t> baseOffset() const {
728 13 : MOZ_ASSERT(JSString::isDependent());
729 26 : JS::AutoCheckCannotGC nogc;
730 13 : if (MOZ_UNLIKELY(base()->isUndepended()))
731 0 : return mozilla::Nothing();
732 : size_t offset;
733 13 : if (hasTwoByteChars())
734 0 : offset = twoByteChars(nogc) - base()->twoByteChars(nogc);
735 : else
736 13 : offset = latin1Chars(nogc) - base()->latin1Chars(nogc);
737 13 : MOZ_ASSERT(offset < base()->length());
738 13 : return mozilla::Some(offset);
739 : }
740 :
741 : public:
742 : static inline JSLinearString* new_(JSContext* cx, JSLinearString* base,
743 : size_t start, size_t length);
744 :
745 0 : inline static size_t offsetOfBase() {
746 0 : return offsetof(JSDependentString, d.s.u3.base);
747 : }
748 :
749 : #ifdef DEBUG
750 : void dumpRepresentation(FILE* fp, int indent) const;
751 : #endif
752 : };
753 :
754 : static_assert(sizeof(JSDependentString) == sizeof(JSString),
755 : "string subclasses must be binary-compatible with JSString");
756 :
757 : class JSFlatString : public JSLinearString
758 : {
759 : /* Vacuous and therefore unimplemented. */
760 : JSFlatString* ensureFlat(JSContext* cx) = delete;
761 : bool isFlat() const = delete;
762 : JSFlatString& asFlat() const = delete;
763 :
764 : template <typename CharT>
765 : static bool isIndexSlow(const CharT* s, size_t length, uint32_t* indexp);
766 :
767 : void init(const char16_t* chars, size_t length);
768 : void init(const JS::Latin1Char* chars, size_t length);
769 :
770 : public:
771 : template <js::AllowGC allowGC, typename CharT>
772 : static inline JSFlatString* new_(JSContext* cx,
773 : const CharT* chars, size_t length);
774 :
775 1210812 : inline bool isIndexSlow(uint32_t* indexp) const {
776 1210812 : MOZ_ASSERT(JSString::isFlat());
777 2421628 : JS::AutoCheckCannotGC nogc;
778 1210813 : if (hasLatin1Chars()) {
779 1210813 : const JS::Latin1Char* s = latin1Chars(nogc);
780 1210816 : return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
781 : }
782 0 : const char16_t* s = twoByteChars(nogc);
783 0 : return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
784 : }
785 :
786 : /*
787 : * Returns true if this string's characters store an unsigned 32-bit
788 : * integer value, initializing *indexp to that value if so. (Thus if
789 : * calling isIndex returns true, js::IndexToString(cx, *indexp) will be a
790 : * string equal to this string.)
791 : */
792 1209630 : inline bool isIndex(uint32_t* indexp) const {
793 1209630 : MOZ_ASSERT(JSString::isFlat());
794 :
795 1209632 : if (JSString::hasIndexValue()) {
796 19 : *indexp = getIndexValue();
797 19 : return true;
798 : }
799 :
800 1209613 : return isIndexSlow(indexp);
801 : }
802 :
803 1199 : inline void maybeInitializeIndex(uint32_t index, bool allowAtom = false) {
804 1199 : MOZ_ASSERT(JSString::isFlat());
805 1199 : MOZ_ASSERT_IF(hasIndexValue(), getIndexValue() == index);
806 1199 : MOZ_ASSERT_IF(!allowAtom, !isAtom());
807 :
808 1199 : if (hasIndexValue() || index > UINT16_MAX)
809 0 : return;
810 :
811 2398 : mozilla::DebugOnly<uint32_t> containedIndex;
812 1199 : MOZ_ASSERT(isIndexSlow(&containedIndex));
813 1199 : MOZ_ASSERT(index == containedIndex);
814 :
815 1199 : d.u1.flags |= (index << INDEX_VALUE_SHIFT) | INDEX_VALUE_BIT;
816 : }
817 :
818 : /*
819 : * Returns a property name represented by this string, or null on failure.
820 : * You must verify that this is not an index per isIndex before calling
821 : * this method.
822 : */
823 : inline js::PropertyName* toPropertyName(JSContext* cx);
824 :
825 : /*
826 : * Once a JSFlatString sub-class has been added to the atom state, this
827 : * operation changes the string to the JSAtom type, in place.
828 : */
829 : MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoAtom(js::HashNumber hash);
830 : MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoPermanentAtom(js::HashNumber hash);
831 :
832 : inline void finalize(js::FreeOp* fop);
833 :
834 : #ifdef DEBUG
835 : void dumpRepresentation(FILE* fp, int indent) const;
836 : #endif
837 : };
838 :
839 : static_assert(sizeof(JSFlatString) == sizeof(JSString),
840 : "string subclasses must be binary-compatible with JSString");
841 :
842 : class JSExtensibleString : public JSFlatString
843 : {
844 : /* Vacuous and therefore unimplemented. */
845 : bool isExtensible() const = delete;
846 : JSExtensibleString& asExtensible() const = delete;
847 :
848 : public:
849 : MOZ_ALWAYS_INLINE
850 40 : size_t capacity() const {
851 40 : MOZ_ASSERT(JSString::isExtensible());
852 40 : return d.s.u3.capacity;
853 : }
854 :
855 : #ifdef DEBUG
856 : void dumpRepresentation(FILE* fp, int indent) const;
857 : #endif
858 : };
859 :
860 : static_assert(sizeof(JSExtensibleString) == sizeof(JSString),
861 : "string subclasses must be binary-compatible with JSString");
862 :
863 : class JSInlineString : public JSFlatString
864 : {
865 : public:
866 : MOZ_ALWAYS_INLINE
867 : const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
868 : MOZ_ASSERT(JSString::isInline());
869 : MOZ_ASSERT(hasLatin1Chars());
870 : return d.inlineStorageLatin1;
871 : }
872 :
873 : MOZ_ALWAYS_INLINE
874 : const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
875 : MOZ_ASSERT(JSString::isInline());
876 : MOZ_ASSERT(hasTwoByteChars());
877 : return d.inlineStorageTwoByte;
878 : }
879 :
880 : template<typename CharT>
881 : static bool lengthFits(size_t length);
882 :
883 38 : static size_t offsetOfInlineStorage() {
884 38 : return offsetof(JSInlineString, d.inlineStorageTwoByte);
885 : }
886 :
887 : #ifdef DEBUG
888 : void dumpRepresentation(FILE* fp, int indent) const;
889 : #endif
890 : };
891 :
892 : static_assert(sizeof(JSInlineString) == sizeof(JSString),
893 : "string subclasses must be binary-compatible with JSString");
894 :
895 : /*
896 : * On 32-bit platforms, JSThinInlineString can store 7 Latin1 characters or 3
897 : * TwoByte characters (excluding null terminator) inline. On 64-bit platforms,
898 : * these numbers are 15 and 7, respectively.
899 : */
900 : class JSThinInlineString : public JSInlineString
901 : {
902 : public:
903 : static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1 - 1;
904 : static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE - 1;
905 :
906 : template <js::AllowGC allowGC>
907 : static inline JSThinInlineString* new_(JSContext* cx);
908 :
909 : template <typename CharT>
910 : inline CharT* init(size_t length);
911 :
912 : template<typename CharT>
913 : static bool lengthFits(size_t length);
914 : };
915 :
916 : static_assert(sizeof(JSThinInlineString) == sizeof(JSString),
917 : "string subclasses must be binary-compatible with JSString");
918 :
919 : /*
920 : * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 11 and
921 : * MAX_LENGTH_LATIN1 is 23 (excluding null terminator). This is deliberate,
922 : * in order to minimize potential performance differences between 32-bit and
923 : * 64-bit platforms.
924 : *
925 : * There are still some differences due to NUM_INLINE_CHARS_* being different.
926 : * E.g. TwoByte strings of length 4--7 will be JSFatInlineStrings on 32-bit
927 : * platforms and JSThinInlineStrings on 64-bit platforms. But the more
928 : * significant transition from inline strings to non-inline strings occurs at
929 : * length 11 (for TwoByte strings) and 23 (Latin1 strings) on both 32-bit and
930 : * 64-bit platforms.
931 : */
932 : class JSFatInlineString : public JSInlineString
933 : {
934 : static const size_t INLINE_EXTENSION_CHARS_LATIN1 = 24 - NUM_INLINE_CHARS_LATIN1;
935 : static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE = 12 - NUM_INLINE_CHARS_TWO_BYTE;
936 :
937 : protected: /* to fool clang into not warning this is unused */
938 : union {
939 : char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1];
940 : char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE];
941 : };
942 :
943 : public:
944 : template <js::AllowGC allowGC>
945 : static inline JSFatInlineString* new_(JSContext* cx);
946 :
947 : static const size_t MAX_LENGTH_LATIN1 = JSString::NUM_INLINE_CHARS_LATIN1 +
948 : INLINE_EXTENSION_CHARS_LATIN1
949 : -1 /* null terminator */;
950 :
951 : static const size_t MAX_LENGTH_TWO_BYTE = JSString::NUM_INLINE_CHARS_TWO_BYTE +
952 : INLINE_EXTENSION_CHARS_TWO_BYTE
953 : -1 /* null terminator */;
954 :
955 : template <typename CharT>
956 : inline CharT* init(size_t length);
957 :
958 : template<typename CharT>
959 : static bool lengthFits(size_t length);
960 :
961 : /* Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING kind. */
962 :
963 : MOZ_ALWAYS_INLINE void finalize(js::FreeOp* fop);
964 : };
965 :
966 : static_assert(sizeof(JSFatInlineString) % js::gc::CellAlignBytes == 0,
967 : "fat inline strings shouldn't waste space up to the next cell "
968 : "boundary");
969 :
970 : class JSExternalString : public JSLinearString
971 : {
972 : void init(const char16_t* chars, size_t length, const JSStringFinalizer* fin);
973 :
974 : /* Vacuous and therefore unimplemented. */
975 : bool isExternal() const = delete;
976 : JSExternalString& asExternal() const = delete;
977 :
978 : public:
979 : static inline JSExternalString* new_(JSContext* cx, const char16_t* chars, size_t length,
980 : const JSStringFinalizer* fin);
981 :
982 52 : const JSStringFinalizer* externalFinalizer() const {
983 52 : MOZ_ASSERT(JSString::isExternal());
984 52 : return d.s.u3.externalFinalizer;
985 : }
986 :
987 : /*
988 : * External chars are never allocated inline or in the nursery, so we can
989 : * safely expose this without requiring an AutoCheckCannotGC argument.
990 : */
991 37 : const char16_t* twoByteChars() const {
992 37 : return rawTwoByteChars();
993 : }
994 :
995 : /* Only called by the GC for strings with the AllocKind::EXTERNAL_STRING kind. */
996 :
997 : inline void finalize(js::FreeOp* fop);
998 :
999 : JSFlatString* ensureFlat(JSContext* cx);
1000 :
1001 : #ifdef DEBUG
1002 : void dumpRepresentation(FILE* fp, int indent) const;
1003 : #endif
1004 : };
1005 :
1006 : static_assert(sizeof(JSExternalString) == sizeof(JSString),
1007 : "string subclasses must be binary-compatible with JSString");
1008 :
1009 : class JSUndependedString : public JSFlatString
1010 : {
1011 : /*
1012 : * JSUndependedString is not explicitly used and is only present for
1013 : * consistency. See JSDependentString::undepend for how a JSDependentString
1014 : * gets morphed into a JSUndependedString.
1015 : */
1016 : };
1017 :
1018 : static_assert(sizeof(JSUndependedString) == sizeof(JSString),
1019 : "string subclasses must be binary-compatible with JSString");
1020 :
1021 : class JSAtom : public JSFlatString
1022 : {
1023 : /* Vacuous and therefore unimplemented. */
1024 : bool isAtom() const = delete;
1025 : JSAtom& asAtom() const = delete;
1026 :
1027 : public:
1028 : /* Returns the PropertyName for this. isIndex() must be false. */
1029 : inline js::PropertyName* asPropertyName();
1030 :
1031 : inline void finalize(js::FreeOp* fop);
1032 :
1033 : MOZ_ALWAYS_INLINE
1034 7498 : bool isPermanent() const {
1035 7498 : return JSString::isPermanentAtom();
1036 : }
1037 :
1038 : // Transform this atom into a permanent atom. This is only done during
1039 : // initialization of the runtime.
1040 8670 : MOZ_ALWAYS_INLINE void morphIntoPermanentAtom() {
1041 8670 : d.u1.flags |= PERMANENT_ATOM_MASK;
1042 8670 : }
1043 :
1044 : inline js::HashNumber hash() const;
1045 : inline void initHash(js::HashNumber hash);
1046 :
1047 : #ifdef DEBUG
1048 : void dump(FILE* fp);
1049 : void dump();
1050 : #endif
1051 : };
1052 :
1053 : static_assert(sizeof(JSAtom) == sizeof(JSString),
1054 : "string subclasses must be binary-compatible with JSString");
1055 :
1056 : namespace js {
1057 :
1058 : class NormalAtom : public JSAtom
1059 : {
1060 : protected: // Silence Clang unused-field warning.
1061 : HashNumber hash_;
1062 : uint32_t padding_; // Ensure the size is a multiple of gc::CellAlignBytes.
1063 :
1064 : public:
1065 1031300 : HashNumber hash() const {
1066 1031300 : return hash_;
1067 : }
1068 59929 : void initHash(HashNumber hash) {
1069 59929 : hash_ = hash;
1070 59929 : }
1071 : };
1072 :
1073 : static_assert(sizeof(NormalAtom) == sizeof(JSString) + sizeof(uint64_t),
1074 : "NormalAtom must have size of a string + HashNumber, "
1075 : "aligned to gc::CellAlignBytes");
1076 :
1077 : class FatInlineAtom : public JSAtom
1078 : {
1079 : protected: // Silence Clang unused-field warning.
1080 : char inlineStorage_[sizeof(JSFatInlineString) - sizeof(JSString)];
1081 : HashNumber hash_;
1082 : uint32_t padding_; // Ensure the size is a multiple of gc::CellAlignBytes.
1083 :
1084 : public:
1085 240941 : HashNumber hash() const {
1086 240941 : return hash_;
1087 : }
1088 16157 : void initHash(HashNumber hash) {
1089 16157 : hash_ = hash;
1090 16157 : }
1091 : };
1092 :
1093 : static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString) + sizeof(uint64_t),
1094 : "FatInlineAtom must have size of a fat inline string + HashNumber, "
1095 : "aligned to gc::CellAlignBytes");
1096 :
1097 : } // namespace js
1098 :
1099 : inline js::HashNumber
1100 1272175 : JSAtom::hash() const
1101 : {
1102 1272175 : if (isFatInline())
1103 240941 : return static_cast<const js::FatInlineAtom*>(this)->hash();
1104 1031297 : return static_cast<const js::NormalAtom*>(this)->hash();
1105 : }
1106 :
1107 : inline void
1108 76086 : JSAtom::initHash(js::HashNumber hash)
1109 : {
1110 76086 : if (isFatInline())
1111 16157 : return static_cast<js::FatInlineAtom*>(this)->initHash(hash);
1112 59929 : return static_cast<js::NormalAtom*>(this)->initHash(hash);
1113 : }
1114 :
1115 : MOZ_ALWAYS_INLINE JSAtom*
1116 62562 : JSFlatString::morphAtomizedStringIntoAtom(js::HashNumber hash)
1117 : {
1118 62562 : d.u1.flags |= ATOM_BIT;
1119 62562 : JSAtom* atom = &asAtom();
1120 62562 : atom->initHash(hash);
1121 62562 : return atom;
1122 : }
1123 :
1124 : MOZ_ALWAYS_INLINE JSAtom*
1125 13524 : JSFlatString::morphAtomizedStringIntoPermanentAtom(js::HashNumber hash)
1126 : {
1127 13524 : d.u1.flags |= PERMANENT_ATOM_MASK;
1128 13524 : JSAtom* atom = &asAtom();
1129 13524 : atom->initHash(hash);
1130 13524 : return atom;
1131 : }
1132 :
1133 : namespace js {
1134 :
1135 : class StaticStrings
1136 : {
1137 : private:
1138 : /* Bigger chars cannot be in a length-2 string. */
1139 : static const size_t SMALL_CHAR_LIMIT = 128U;
1140 : static const size_t NUM_SMALL_CHARS = 64U;
1141 :
1142 : JSAtom* length2StaticTable[NUM_SMALL_CHARS * NUM_SMALL_CHARS];
1143 :
1144 : public:
1145 : /* We keep these public for the JITs. */
1146 : static const size_t UNIT_STATIC_LIMIT = 256U;
1147 : JSAtom* unitStaticTable[UNIT_STATIC_LIMIT];
1148 :
1149 : static const size_t INT_STATIC_LIMIT = 256U;
1150 : JSAtom* intStaticTable[INT_STATIC_LIMIT];
1151 :
1152 3 : StaticStrings() {
1153 3 : mozilla::PodZero(this);
1154 3 : }
1155 :
1156 : bool init(JSContext* cx);
1157 : void trace(JSTracer* trc);
1158 :
1159 361 : static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
1160 :
1161 287 : JSAtom* getUint(uint32_t u) {
1162 287 : MOZ_ASSERT(hasUint(u));
1163 287 : return intStaticTable[u];
1164 : }
1165 :
1166 864 : static bool hasInt(int32_t i) {
1167 864 : return uint32_t(i) < INT_STATIC_LIMIT;
1168 : }
1169 :
1170 213 : JSAtom* getInt(int32_t i) {
1171 213 : MOZ_ASSERT(hasInt(i));
1172 213 : return getUint(uint32_t(i));
1173 : }
1174 :
1175 11602 : static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
1176 :
1177 11601 : JSAtom* getUnit(char16_t c) {
1178 11601 : MOZ_ASSERT(hasUnit(c));
1179 11601 : return unitStaticTable[c];
1180 : }
1181 :
1182 : /* May not return atom, returns null on (reported) failure. */
1183 : inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str, size_t index);
1184 :
1185 : template <typename CharT>
1186 : static bool isStatic(const CharT* chars, size_t len);
1187 : static bool isStatic(JSAtom* atom);
1188 :
1189 : /* Return null if no static atom exists for the given (chars, length). */
1190 : template <typename CharT>
1191 449175 : MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) {
1192 449175 : switch (length) {
1193 : case 1: {
1194 11581 : char16_t c = chars[0];
1195 11581 : if (c < UNIT_STATIC_LIMIT)
1196 11566 : return getUnit(c);
1197 15 : return nullptr;
1198 : }
1199 : case 2:
1200 14908 : if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1]))
1201 12354 : return getLength2(chars[0], chars[1]);
1202 2554 : return nullptr;
1203 : case 3:
1204 : /*
1205 : * Here we know that JSString::intStringTable covers only 256 (or at least
1206 : * not 1000 or more) chars. We rely on order here to resolve the unit vs.
1207 : * int string/length-2 string atom identity issue by giving priority to unit
1208 : * strings for "0" through "9" and length-2 strings for "10" through "99".
1209 : */
1210 : static_assert(INT_STATIC_LIMIT <= 999,
1211 : "static int strings assumed below to be at most "
1212 : "three digits");
1213 20996 : if ('1' <= chars[0] && chars[0] <= '9' &&
1214 560 : '0' <= chars[1] && chars[1] <= '9' &&
1215 372 : '0' <= chars[2] && chars[2] <= '9') {
1216 372 : int i = (chars[0] - '0') * 100 +
1217 186 : (chars[1] - '0') * 10 +
1218 186 : (chars[2] - '0');
1219 :
1220 186 : if (unsigned(i) < INT_STATIC_LIMIT)
1221 6 : return getInt(i);
1222 : }
1223 20802 : return nullptr;
1224 : }
1225 :
1226 401878 : return nullptr;
1227 : }
1228 :
1229 : private:
1230 : typedef uint8_t SmallChar;
1231 : static const SmallChar INVALID_SMALL_CHAR = -1;
1232 :
1233 52035 : static bool fitsInSmallChar(char16_t c) {
1234 52035 : return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR;
1235 : }
1236 :
1237 : static const SmallChar toSmallChar[];
1238 :
1239 12354 : MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) {
1240 12354 : MOZ_ASSERT(fitsInSmallChar(c1));
1241 12354 : MOZ_ASSERT(fitsInSmallChar(c2));
1242 12354 : size_t index = (size_t(toSmallChar[c1]) << 6) + toSmallChar[c2];
1243 12354 : return length2StaticTable[index];
1244 : }
1245 : JSAtom* getLength2(uint32_t u) {
1246 : MOZ_ASSERT(u < 100);
1247 : return getLength2('0' + u / 10, '0' + u % 10);
1248 : }
1249 : };
1250 :
1251 : /*
1252 : * Represents an atomized string which does not contain an index (that is, an
1253 : * unsigned 32-bit value). Thus for any PropertyName propname,
1254 : * ToString(ToUint32(propname)) never equals propname.
1255 : *
1256 : * To more concretely illustrate the utility of PropertyName, consider that it
1257 : * is used to partition, in a type-safe manner, the ways to refer to a
1258 : * property, as follows:
1259 : *
1260 : * - uint32_t indexes,
1261 : * - PropertyName strings which don't encode uint32_t indexes, and
1262 : * - jsspecial special properties (non-ES5 properties like object-valued
1263 : * jsids, JSID_EMPTY, JSID_VOID, and maybe in the future Harmony-proposed
1264 : * private names).
1265 : */
1266 : class PropertyName : public JSAtom
1267 : {
1268 : private:
1269 : /* Vacuous and therefore unimplemented. */
1270 : PropertyName* asPropertyName() = delete;
1271 : };
1272 :
1273 : static_assert(sizeof(PropertyName) == sizeof(JSString),
1274 : "string subclasses must be binary-compatible with JSString");
1275 :
1276 : static MOZ_ALWAYS_INLINE jsid
1277 329191 : NameToId(PropertyName* name)
1278 : {
1279 329191 : return NON_INTEGER_ATOM_TO_JSID(name);
1280 : }
1281 :
1282 : using PropertyNameVector = JS::GCVector<PropertyName*>;
1283 :
1284 : template <typename CharT>
1285 : void
1286 : CopyChars(CharT* dest, const JSLinearString& str);
1287 :
1288 : static inline UniqueChars
1289 650 : StringToNewUTF8CharsZ(JSContext* maybecx, JSString& str)
1290 : {
1291 1300 : JS::AutoCheckCannotGC nogc;
1292 :
1293 650 : JSLinearString* linear = str.ensureLinear(maybecx);
1294 650 : if (!linear)
1295 0 : return nullptr;
1296 :
1297 650 : return UniqueChars(linear->hasLatin1Chars()
1298 1950 : ? JS::CharsToNewUTF8CharsZ(maybecx, linear->latin1Range(nogc)).c_str()
1299 1300 : : JS::CharsToNewUTF8CharsZ(maybecx, linear->twoByteRange(nogc)).c_str());
1300 : }
1301 :
1302 : /* GC-allocate a string descriptor for the given malloc-allocated chars. */
1303 : template <js::AllowGC allowGC, typename CharT>
1304 : extern JSFlatString*
1305 : NewString(JSContext* cx, CharT* chars, size_t length);
1306 :
1307 : /* Like NewString, but doesn't try to deflate to Latin1. */
1308 : template <js::AllowGC allowGC, typename CharT>
1309 : extern JSFlatString*
1310 : NewStringDontDeflate(JSContext* cx, CharT* chars, size_t length);
1311 :
1312 : extern JSLinearString*
1313 : NewDependentString(JSContext* cx, JSString* base, size_t start, size_t length);
1314 :
1315 : /* Take ownership of an array of Latin1Chars. */
1316 : extern JSFlatString*
1317 : NewLatin1StringZ(JSContext* cx, UniqueChars chars);
1318 :
1319 : /* Copy a counted string and GC-allocate a descriptor for it. */
1320 : template <js::AllowGC allowGC, typename CharT>
1321 : extern JSFlatString*
1322 : NewStringCopyN(JSContext* cx, const CharT* s, size_t n);
1323 :
1324 : template <js::AllowGC allowGC>
1325 : inline JSFlatString*
1326 4822 : NewStringCopyN(JSContext* cx, const char* s, size_t n)
1327 : {
1328 4822 : return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n);
1329 : }
1330 :
1331 : /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
1332 : template <js::AllowGC allowGC, typename CharT>
1333 : extern JSFlatString*
1334 : NewStringCopyNDontDeflate(JSContext* cx, const CharT* s, size_t n);
1335 :
1336 : /* Copy a C string and GC-allocate a descriptor for it. */
1337 : template <js::AllowGC allowGC>
1338 : inline JSFlatString*
1339 250 : NewStringCopyZ(JSContext* cx, const char16_t* s)
1340 : {
1341 250 : return NewStringCopyN<allowGC>(cx, s, js_strlen(s));
1342 : }
1343 :
1344 : template <js::AllowGC allowGC>
1345 : inline JSFlatString*
1346 2109 : NewStringCopyZ(JSContext* cx, const char* s)
1347 : {
1348 2109 : return NewStringCopyN<allowGC>(cx, s, strlen(s));
1349 : }
1350 :
1351 : template <js::AllowGC allowGC>
1352 : extern JSFlatString*
1353 : NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8);
1354 :
1355 : template <js::AllowGC allowGC>
1356 : inline JSFlatString*
1357 2 : NewStringCopyUTF8Z(JSContext* cx, const JS::ConstUTF8CharsZ utf8)
1358 : {
1359 2 : return NewStringCopyUTF8N<allowGC>(cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())));
1360 : }
1361 :
1362 : JSString*
1363 : NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n, const JSStringFinalizer* fin,
1364 : bool* allocatedExternal);
1365 :
1366 : JS_STATIC_ASSERT(sizeof(HashNumber) == 4);
1367 :
1368 : } /* namespace js */
1369 :
1370 : // Addon IDs are interned atoms which are never destroyed. This detail is
1371 : // not exposed outside the API.
1372 : class JSAddonId : public JSAtom
1373 : {};
1374 :
1375 : MOZ_ALWAYS_INLINE bool
1376 14360 : JSString::getChar(JSContext* cx, size_t index, char16_t* code)
1377 : {
1378 14360 : MOZ_ASSERT(index < length());
1379 :
1380 : /*
1381 : * Optimization for one level deep ropes.
1382 : * This is common for the following pattern:
1383 : *
1384 : * while() {
1385 : * text = text.substr(0, x) + "bla" + text.substr(x)
1386 : * test.charCodeAt(x + 1)
1387 : * }
1388 : */
1389 : JSString* str;
1390 14360 : if (isRope()) {
1391 4 : JSRope* rope = &asRope();
1392 4 : if (uint32_t(index) < rope->leftChild()->length()) {
1393 4 : str = rope->leftChild();
1394 : } else {
1395 0 : str = rope->rightChild();
1396 0 : index -= rope->leftChild()->length();
1397 : }
1398 : } else {
1399 14356 : str = this;
1400 : }
1401 :
1402 14360 : if (!str->ensureLinear(cx))
1403 0 : return false;
1404 :
1405 14360 : *code = str->asLinear().latin1OrTwoByteChar(index);
1406 14360 : return true;
1407 : }
1408 :
1409 : MOZ_ALWAYS_INLINE JSLinearString*
1410 54126 : JSString::ensureLinear(JSContext* cx)
1411 : {
1412 54126 : return isLinear()
1413 55042 : ? &asLinear()
1414 55042 : : asRope().flatten(cx);
1415 : }
1416 :
1417 : inline JSLinearString*
1418 54 : JSString::base() const
1419 : {
1420 54 : MOZ_ASSERT(hasBase());
1421 54 : MOZ_ASSERT(!d.s.u3.base->isInline());
1422 54 : return d.s.u3.base;
1423 : }
1424 :
1425 : template<>
1426 : MOZ_ALWAYS_INLINE const char16_t*
1427 51 : JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1428 : {
1429 51 : return nonInlineTwoByteChars(nogc);
1430 : }
1431 :
1432 : template<>
1433 : MOZ_ALWAYS_INLINE const JS::Latin1Char*
1434 2130 : JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1435 : {
1436 2130 : return nonInlineLatin1Chars(nogc);
1437 : }
1438 :
1439 : template<>
1440 : MOZ_ALWAYS_INLINE const char16_t*
1441 11 : JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1442 : {
1443 11 : return rawTwoByteChars();
1444 : }
1445 :
1446 : template<>
1447 : MOZ_ALWAYS_INLINE const JS::Latin1Char*
1448 680 : JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1449 : {
1450 680 : return rawLatin1Chars();
1451 : }
1452 :
1453 : template <>
1454 : MOZ_ALWAYS_INLINE bool
1455 0 : JSRope::copyChars<JS::Latin1Char>(JSContext* cx,
1456 : js::ScopedJSFreePtr<JS::Latin1Char>& out) const
1457 : {
1458 0 : return copyLatin1Chars(cx, out);
1459 : }
1460 :
1461 : template <>
1462 : MOZ_ALWAYS_INLINE bool
1463 0 : JSRope::copyChars<char16_t>(JSContext* cx, js::ScopedJSFreePtr<char16_t>& out) const
1464 : {
1465 0 : return copyTwoByteChars(cx, out);
1466 : }
1467 :
1468 : template<>
1469 : MOZ_ALWAYS_INLINE bool
1470 120724 : JSThinInlineString::lengthFits<JS::Latin1Char>(size_t length)
1471 : {
1472 120724 : return length <= MAX_LENGTH_LATIN1;
1473 : }
1474 :
1475 : template<>
1476 : MOZ_ALWAYS_INLINE bool
1477 79 : JSThinInlineString::lengthFits<char16_t>(size_t length)
1478 : {
1479 79 : return length <= MAX_LENGTH_TWO_BYTE;
1480 : }
1481 :
1482 : template<>
1483 : MOZ_ALWAYS_INLINE bool
1484 164908 : JSFatInlineString::lengthFits<JS::Latin1Char>(size_t length)
1485 : {
1486 : static_assert((INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellAlignBytes == 0,
1487 : "fat inline strings' Latin1 characters don't exactly "
1488 : "fill subsequent cells and thus are wasteful");
1489 : static_assert(MAX_LENGTH_LATIN1 + 1 ==
1490 : (sizeof(JSFatInlineString) -
1491 : offsetof(JSFatInlineString, d.inlineStorageLatin1)) / sizeof(char),
1492 : "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
1493 : "storage count");
1494 :
1495 164908 : return length <= MAX_LENGTH_LATIN1;
1496 : }
1497 :
1498 : template<>
1499 : MOZ_ALWAYS_INLINE bool
1500 374 : JSFatInlineString::lengthFits<char16_t>(size_t length)
1501 : {
1502 : static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) % js::gc::CellAlignBytes == 0,
1503 : "fat inline strings' char16_t characters don't exactly "
1504 : "fill subsequent cells and thus are wasteful");
1505 : static_assert(MAX_LENGTH_TWO_BYTE + 1 ==
1506 : (sizeof(JSFatInlineString) -
1507 : offsetof(JSFatInlineString, d.inlineStorageTwoByte)) / sizeof(char16_t),
1508 : "MAX_LENGTH_TWO_BYTE must be one less than inline "
1509 : "char16_t storage count");
1510 :
1511 374 : return length <= MAX_LENGTH_TWO_BYTE;
1512 : }
1513 :
1514 : template<>
1515 : MOZ_ALWAYS_INLINE bool
1516 147006 : JSInlineString::lengthFits<JS::Latin1Char>(size_t length)
1517 : {
1518 : // If it fits in a fat inline string, it fits in any inline string.
1519 147006 : return JSFatInlineString::lengthFits<JS::Latin1Char>(length);
1520 : }
1521 :
1522 : template<>
1523 : MOZ_ALWAYS_INLINE bool
1524 357 : JSInlineString::lengthFits<char16_t>(size_t length)
1525 : {
1526 : // If it fits in a fat inline string, it fits in any inline string.
1527 357 : return JSFatInlineString::lengthFits<char16_t>(length);
1528 : }
1529 :
1530 : template<>
1531 : MOZ_ALWAYS_INLINE void
1532 75 : JSString::setNonInlineChars(const char16_t* chars)
1533 : {
1534 75 : d.s.u2.nonInlineCharsTwoByte = chars;
1535 75 : }
1536 :
1537 : template<>
1538 : MOZ_ALWAYS_INLINE void
1539 3870 : JSString::setNonInlineChars(const JS::Latin1Char* chars)
1540 : {
1541 3870 : d.s.u2.nonInlineCharsLatin1 = chars;
1542 3870 : }
1543 :
1544 : MOZ_ALWAYS_INLINE const JS::Latin1Char*
1545 2458976 : JSLinearString::rawLatin1Chars() const
1546 : {
1547 2458976 : MOZ_ASSERT(JSString::isLinear());
1548 2458980 : MOZ_ASSERT(hasLatin1Chars());
1549 2458994 : return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
1550 : }
1551 :
1552 : MOZ_ALWAYS_INLINE const char16_t*
1553 15107 : JSLinearString::rawTwoByteChars() const
1554 : {
1555 15107 : MOZ_ASSERT(JSString::isLinear());
1556 15107 : MOZ_ASSERT(hasTwoByteChars());
1557 15107 : return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
1558 : }
1559 :
1560 : inline js::PropertyName*
1561 751662 : JSAtom::asPropertyName()
1562 : {
1563 : #ifdef DEBUG
1564 : uint32_t dummy;
1565 751662 : MOZ_ASSERT(!isIndex(&dummy));
1566 : #endif
1567 751668 : return static_cast<js::PropertyName*>(this);
1568 : }
1569 :
1570 : #endif /* vm_String_h */
|