Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef vm_SharedImmutableStringsCache_h
8 : #define vm_SharedImmutableStringsCache_h
9 :
10 : #include "mozilla/Maybe.h"
11 : #include "mozilla/UniquePtr.h"
12 :
13 : #include <cstring>
14 : #include <new> // for placement new
15 :
16 : #include "jsstr.h"
17 :
18 : #include "js/HashTable.h"
19 : #include "js/Utility.h"
20 :
21 : #include "threading/ExclusiveData.h"
22 :
23 : #include "vm/MutexIDs.h"
24 :
25 : namespace js {
26 :
27 : class SharedImmutableString;
28 : class SharedImmutableTwoByteString;
29 :
30 : /**
31 : * The `SharedImmutableStringsCache` allows for safely sharing and deduplicating
32 : * immutable strings (either `const char*` or `const char16_t*`) between
33 : * threads.
34 : *
35 : * The locking mechanism is dead-simple and coarse grained: a single lock guards
36 : * all of the internal table itself, the table's entries, and the entries'
37 : * reference counts. It is only safe to perform any mutation on the cache or any
38 : * data stored within the cache when this lock is acquired.
39 : */
40 : class SharedImmutableStringsCache
41 : {
42 : friend class SharedImmutableString;
43 : friend class SharedImmutableTwoByteString;
44 : struct Hasher;
45 :
46 : public:
47 : using OwnedChars = mozilla::UniquePtr<char[], JS::FreePolicy>;
48 : using OwnedTwoByteChars = mozilla::UniquePtr<char16_t[], JS::FreePolicy>;
49 :
50 : /**
51 : * Get the canonical, shared, and de-duplicated version of the given `const
52 : * char*` string. If such a string does not exist, call `intoOwnedChars` and
53 : * add the string it returns to the cache.
54 : *
55 : * `intoOwnedChars` must create an owned version of the given string, and
56 : * must have one of the following types:
57 : *
58 : * mozilla::UniquePtr<char[], JS::FreePolicy> intoOwnedChars();
59 : * mozilla::UniquePtr<char[], JS::FreePolicy>&& intoOwnedChars();
60 : *
61 : * It can be used by callers to elide a copy of the string when it is safe
62 : * to give up ownership of the lookup string to the cache. It must return a
63 : * `nullptr` on failure.
64 : *
65 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
66 : * returned.
67 : */
68 : template <typename IntoOwnedChars>
69 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
70 : getOrCreate(const char* chars, size_t length, IntoOwnedChars intoOwnedChars);
71 :
72 : /**
73 : * Take ownership of the given `chars` and return the canonical, shared and
74 : * de-duplicated version.
75 : *
76 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
77 : * returned.
78 : */
79 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
80 : getOrCreate(OwnedChars&& chars, size_t length);
81 :
82 : /**
83 : * Do not take ownership of the given `chars`. Return the canonical, shared
84 : * and de-duplicated version. If there is no extant shared version of
85 : * `chars`, make a copy and insert it into the cache.
86 : *
87 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
88 : * returned.
89 : */
90 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableString>
91 : getOrCreate(const char* chars, size_t length);
92 :
93 : /**
94 : * Get the canonical, shared, and de-duplicated version of the given `const
95 : * char16_t*` string. If such a string does not exist, call `intoOwnedChars`
96 : * and add the string it returns to the cache.
97 : *
98 : * `intoOwnedTwoByteChars` must create an owned version of the given string,
99 : * and must have one of the following types:
100 : *
101 : * mozilla::UniquePtr<char16_t[], JS::FreePolicy> intoOwnedTwoByteChars();
102 : * mozilla::UniquePtr<char16_t[], JS::FreePolicy>&& intoOwnedTwoByteChars();
103 : *
104 : * It can be used by callers to elide a copy of the string when it is safe
105 : * to give up ownership of the lookup string to the cache. It must return a
106 : * `nullptr` on failure.
107 : *
108 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
109 : * returned.
110 : */
111 : template <typename IntoOwnedTwoByteChars>
112 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
113 : getOrCreate(const char16_t* chars, size_t length, IntoOwnedTwoByteChars intoOwnedTwoByteChars);
114 :
115 : /**
116 : * Take ownership of the given `chars` and return the canonical, shared and
117 : * de-duplicated version.
118 : *
119 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
120 : * returned.
121 : */
122 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
123 : getOrCreate(OwnedTwoByteChars&& chars, size_t length);
124 :
125 : /**
126 : * Do not take ownership of the given `chars`. Return the canonical, shared
127 : * and de-duplicated version. If there is no extant shared version of
128 : * `chars`, then make a copy and insert it into the cache.
129 : *
130 : * On success, `Some` is returned. In the case of OOM failure, `Nothing` is
131 : * returned.
132 : */
133 : MOZ_MUST_USE mozilla::Maybe<SharedImmutableTwoByteString>
134 : getOrCreate(const char16_t* chars, size_t length);
135 :
136 0 : size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const {
137 0 : MOZ_ASSERT(inner_);
138 0 : size_t n = mallocSizeOf(inner_);
139 :
140 0 : auto locked = inner_->lock();
141 0 : if (!locked->set.initialized())
142 0 : return n;
143 :
144 : // Size of the table.
145 0 : n += locked->set.sizeOfExcludingThis(mallocSizeOf);
146 :
147 : // Sizes of the strings and their boxes.
148 0 : for (auto r = locked->set.all(); !r.empty(); r.popFront()) {
149 0 : n += mallocSizeOf(r.front().get());
150 0 : if (const char* chars = r.front()->chars())
151 0 : n += mallocSizeOf(chars);
152 : }
153 :
154 0 : return n;
155 : }
156 :
157 : /**
158 : * Construct a new cache of shared, immutable strings. Returns
159 : * `mozilla::Nothing` on out of memory failure.
160 : */
161 3 : static mozilla::Maybe<SharedImmutableStringsCache> Create() {
162 3 : auto inner = js_new<ExclusiveData<Inner>>(mutexid::SharedImmutableStringsCache);
163 3 : if (!inner)
164 0 : return mozilla::Nothing();
165 :
166 6 : auto locked = inner->lock();
167 3 : return mozilla::Some(SharedImmutableStringsCache(locked));
168 : }
169 :
170 6810 : SharedImmutableStringsCache(SharedImmutableStringsCache&& rhs)
171 6810 : : inner_(rhs.inner_)
172 : {
173 6810 : MOZ_ASSERT(inner_);
174 6810 : rhs.inner_ = nullptr;
175 6810 : }
176 :
177 0 : SharedImmutableStringsCache& operator=(SharedImmutableStringsCache&& rhs) {
178 0 : MOZ_ASSERT(this != &rhs, "self move not allowed");
179 0 : new (this) SharedImmutableStringsCache(mozilla::Move(rhs));
180 0 : return *this;
181 : }
182 :
183 : SharedImmutableStringsCache& operator=(const SharedImmutableStringsCache&) = delete;
184 :
185 : SharedImmutableStringsCache clone() {
186 : MOZ_ASSERT(inner_);
187 : auto locked = inner_->lock();
188 : return SharedImmutableStringsCache(locked);
189 : }
190 :
191 6810 : ~SharedImmutableStringsCache() {
192 6810 : if (!inner_)
193 13620 : return;
194 :
195 0 : bool shouldDestroy = false;
196 : {
197 : // ~ExclusiveData takes the lock, so be sure to drop the lock before
198 : // attempting to destroy the inner.
199 0 : auto locked = inner_->lock();
200 0 : MOZ_ASSERT(locked->refcount > 0);
201 0 : locked->refcount--;
202 0 : if (locked->refcount == 0)
203 0 : shouldDestroy = true;
204 : }
205 0 : if (shouldDestroy)
206 0 : js_delete(inner_);
207 6810 : }
208 :
209 : /**
210 : * Purge the cache of all refcount == 0 entries.
211 : */
212 1 : void purge() {
213 2 : auto locked = inner_->lock();
214 1 : MOZ_ASSERT(locked->refcount > 0);
215 :
216 1 : if (!locked->set.initialized())
217 0 : return;
218 :
219 1132 : for (Inner::Set::Enum e(locked->set); !e.empty(); e.popFront()) {
220 1131 : if (e.front()->refcount == 0) {
221 : // The chars should be eagerly freed when refcount reaches zero.
222 0 : MOZ_ASSERT(!e.front()->chars());
223 0 : e.removeFront();
224 : } else {
225 : // The chars should exist as long as the refcount is non-zero.
226 1131 : MOZ_ASSERT(e.front()->chars());
227 : }
228 : }
229 : }
230 :
231 : private:
232 : class StringBox
233 : {
234 : friend class SharedImmutableString;
235 :
236 : OwnedChars chars_;
237 : size_t length_;
238 :
239 : public:
240 : mutable size_t refcount;
241 :
242 : using Ptr = mozilla::UniquePtr<StringBox, JS::DeletePolicy<StringBox>>;
243 :
244 1243 : StringBox(OwnedChars&& chars, size_t length)
245 1243 : : chars_(mozilla::Move(chars))
246 : , length_(length)
247 1243 : , refcount(0)
248 : {
249 1243 : MOZ_ASSERT(chars_);
250 1243 : }
251 :
252 1243 : static Ptr Create(OwnedChars&& chars, size_t length) {
253 1243 : return Ptr(js_new<StringBox>(mozilla::Move(chars), length));
254 : }
255 :
256 : StringBox(const StringBox&) = delete;
257 : StringBox& operator=(const StringBox&) = delete;
258 :
259 0 : ~StringBox() {
260 0 : MOZ_RELEASE_ASSERT(refcount == 0,
261 : "There are `SharedImmutable[TwoByte]String`s outliving their "
262 : "associated cache! This always leads to use-after-free in the "
263 : "`~SharedImmutableString` destructor!");
264 0 : }
265 :
266 7695 : const char* chars() const { return chars_.get(); }
267 4951 : size_t length() const { return length_; }
268 : };
269 :
270 : struct Hasher
271 : {
272 : /**
273 : * A structure used when querying for a `const char*` string in the cache.
274 : */
275 : class Lookup
276 : {
277 : friend struct Hasher;
278 :
279 : HashNumber hash_;
280 : const char* chars_;
281 : size_t length_;
282 :
283 : public:
284 1703 : Lookup(HashNumber hash, const char* chars, size_t length)
285 1703 : : hash_(hash)
286 : , chars_(chars)
287 1703 : , length_(length)
288 : {
289 1703 : MOZ_ASSERT(chars_);
290 1703 : MOZ_ASSERT(hash == Hasher::hashLongString(chars, length));
291 1703 : }
292 :
293 1703 : Lookup(HashNumber hash, const char16_t* chars, size_t length)
294 1703 : : Lookup(hash, reinterpret_cast<const char*>(chars), length * sizeof(char16_t))
295 1703 : { }
296 : };
297 :
298 : static const size_t SHORT_STRING_MAX_LENGTH = 8192;
299 : static const size_t HASH_CHUNK_LENGTH = SHORT_STRING_MAX_LENGTH / 2;
300 :
301 : // For strings longer than SHORT_STRING_MAX_LENGTH, we only hash the
302 : // first HASH_CHUNK_LENGTH and last HASH_CHUNK_LENGTH characters in the
303 : // string. This increases the risk of collisions, but in practice it
304 : // should be rare, and it yields a large speedup for hashing long
305 : // strings.
306 3406 : static HashNumber hashLongString(const char* chars, size_t length) {
307 3406 : MOZ_ASSERT(chars);
308 : return length <= SHORT_STRING_MAX_LENGTH
309 3684 : ? mozilla::HashString(chars, length)
310 278 : : mozilla::AddToHash(mozilla::HashString(chars, HASH_CHUNK_LENGTH),
311 278 : mozilla::HashString(chars + length - HASH_CHUNK_LENGTH,
312 3406 : HASH_CHUNK_LENGTH));
313 : }
314 :
315 1703 : static HashNumber hash(const Lookup& lookup) {
316 1703 : return lookup.hash_;
317 : }
318 :
319 460 : static bool match(const StringBox::Ptr& key, const Lookup& lookup) {
320 460 : MOZ_ASSERT(lookup.chars_);
321 :
322 460 : if (!key->chars() || key->length() != lookup.length_)
323 0 : return false;
324 :
325 460 : if (key->chars() == lookup.chars_)
326 0 : return true;
327 :
328 460 : return memcmp(key->chars(), lookup.chars_, key->length()) == 0;
329 : }
330 : };
331 :
332 : // The `Inner` struct contains the actual cached contents, and is reference
333 : // counted and shared between all `SharedImmutableStringsCache` and
334 : // `SharedImmutable[TwoByte]String` holders.
335 : struct Inner
336 : {
337 : using Set = HashSet<StringBox::Ptr, Hasher, SystemAllocPolicy>;
338 :
339 : size_t refcount;
340 : Set set;
341 :
342 3 : Inner()
343 3 : : refcount(0)
344 3 : , set()
345 3 : { }
346 :
347 : Inner(const Inner&) = delete;
348 : Inner& operator=(const Inner&) = delete;
349 :
350 0 : ~Inner()
351 0 : {
352 0 : MOZ_ASSERT(refcount == 0);
353 0 : }
354 : };
355 :
356 : const ExclusiveData<Inner>* inner_;
357 :
358 1704 : explicit SharedImmutableStringsCache(ExclusiveData<Inner>::Guard& locked)
359 1704 : : inner_(locked.parent())
360 : {
361 1704 : locked->refcount++;
362 1704 : }
363 : };
364 :
365 : /**
366 : * The `SharedImmutableString` class holds a reference to a `const char*` string
367 : * from the `SharedImmutableStringsCache` and releases the reference upon
368 : * destruction.
369 : */
370 : class SharedImmutableString
371 : {
372 : friend class SharedImmutableStringsCache;
373 : friend class SharedImmutableTwoByteString;
374 :
375 : mutable SharedImmutableStringsCache cache_;
376 : mutable SharedImmutableStringsCache::StringBox* box_;
377 :
378 : SharedImmutableString(ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked,
379 : SharedImmutableStringsCache::StringBox* box);
380 :
381 : public:
382 : /**
383 : * `SharedImmutableString`s are move-able. It is an error to use a
384 : * `SharedImmutableString` after it has been moved.
385 : */
386 : SharedImmutableString(SharedImmutableString&& rhs);
387 : SharedImmutableString& operator=(SharedImmutableString&& rhs);
388 :
389 : /**
390 : * Create another shared reference to the underlying string.
391 : */
392 : SharedImmutableString clone() const;
393 :
394 : // If you want a copy, take one explicitly with `clone`!
395 : SharedImmutableString& operator=(const SharedImmutableString&) = delete;
396 :
397 : ~SharedImmutableString();
398 :
399 : /**
400 : * Get a raw pointer to the underlying string. It is only safe to use the
401 : * resulting pointer while this `SharedImmutableString` exists.
402 : */
403 1426 : const char* chars() const {
404 1426 : MOZ_ASSERT(box_);
405 1426 : MOZ_ASSERT(box_->refcount > 0);
406 1426 : MOZ_ASSERT(box_->chars());
407 1426 : return box_->chars();
408 : }
409 :
410 : /**
411 : * Get the length of the underlying string.
412 : */
413 2328 : size_t length() const {
414 2328 : MOZ_ASSERT(box_);
415 2328 : MOZ_ASSERT(box_->refcount > 0);
416 2328 : MOZ_ASSERT(box_->chars());
417 2328 : return box_->length();
418 : }
419 : };
420 :
421 : /**
422 : * The `SharedImmutableTwoByteString` class holds a reference to a `const
423 : * char16_t*` string from the `SharedImmutableStringsCache` and releases the
424 : * reference upon destruction.
425 : */
426 6816 : class SharedImmutableTwoByteString
427 : {
428 : friend class SharedImmutableStringsCache;
429 :
430 : // If a `char*` string and `char16_t*` string happen to have the same bytes,
431 : // the bytes will be shared but handed out as different types.
432 : SharedImmutableString string_;
433 :
434 : explicit SharedImmutableTwoByteString(SharedImmutableString&& string);
435 : SharedImmutableTwoByteString(ExclusiveData<SharedImmutableStringsCache::Inner>::Guard& locked,
436 : SharedImmutableStringsCache::StringBox* box);
437 :
438 : public:
439 : /**
440 : * `SharedImmutableTwoByteString`s are move-able. It is an error to use a
441 : * `SharedImmutableTwoByteString` after it has been moved.
442 : */
443 : SharedImmutableTwoByteString(SharedImmutableTwoByteString&& rhs);
444 : SharedImmutableTwoByteString& operator=(SharedImmutableTwoByteString&& rhs);
445 :
446 : /**
447 : * Create another shared reference to the underlying string.
448 : */
449 : SharedImmutableTwoByteString clone() const;
450 :
451 : // If you want a copy, take one explicitly with `clone`!
452 : SharedImmutableTwoByteString& operator=(const SharedImmutableTwoByteString&) = delete;
453 :
454 : /**
455 : * Get a raw pointer to the underlying string. It is only safe to use the
456 : * resulting pointer while this `SharedImmutableTwoByteString` exists.
457 : */
458 1426 : const char16_t* chars() const { return reinterpret_cast<const char16_t*>(string_.chars()); }
459 :
460 : /**
461 : * Get the length of the underlying string.
462 : */
463 2328 : size_t length() const { return string_.length() / sizeof(char16_t); }
464 : };
465 :
466 : } // namespace js
467 :
468 : #endif // vm_SharedImmutableStringsCache_h
|