Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : /**
8 : * The compiled representation of a RegExp, potentially shared among RegExp instances created
9 : * during separate evaluations of a single RegExp literal in source code.
10 : */
11 :
12 : #ifndef vm_RegExpShared_h
13 : #define vm_RegExpShared_h
14 :
15 : #include "mozilla/Assertions.h"
16 : #include "mozilla/MemoryReporting.h"
17 :
18 : #include "jsalloc.h"
19 : #include "jsatom.h"
20 :
21 : #include "builtin/SelfHostingDefines.h"
22 : #include "gc/Barrier.h"
23 : #include "gc/Heap.h"
24 : #include "gc/Marking.h"
25 : #include "js/UbiNode.h"
26 : #include "js/Vector.h"
27 : #include "vm/ArrayObject.h"
28 :
29 : struct JSContext;
30 :
31 : namespace js {
32 :
33 : class ArrayObject;
34 : class MatchPairs;
35 : class RegExpCompartment;
36 : class RegExpShared;
37 : class RegExpStatics;
38 :
39 : using RootedRegExpShared = JS::Rooted<RegExpShared*>;
40 : using HandleRegExpShared = JS::Handle<RegExpShared*>;
41 : using MutableHandleRegExpShared = JS::MutableHandle<RegExpShared*>;
42 :
43 : enum RegExpFlag : uint8_t
44 : {
45 : IgnoreCaseFlag = 0x01,
46 : GlobalFlag = 0x02,
47 : MultilineFlag = 0x04,
48 : StickyFlag = 0x08,
49 : UnicodeFlag = 0x10,
50 :
51 : NoFlags = 0x00,
52 : AllFlags = 0x1f
53 : };
54 :
55 : static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG &&
56 : GlobalFlag == REGEXP_GLOBAL_FLAG &&
57 : MultilineFlag == REGEXP_MULTILINE_FLAG &&
58 : StickyFlag == REGEXP_STICKY_FLAG &&
59 : UnicodeFlag == REGEXP_UNICODE_FLAG,
60 : "Flag values should be in sync with self-hosted JS");
61 :
62 : enum RegExpRunStatus
63 : {
64 : RegExpRunStatus_Error,
65 : RegExpRunStatus_Success,
66 : RegExpRunStatus_Success_NotFound
67 : };
68 :
69 : /*
70 : * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
71 : * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
72 : * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
73 : * table so that they can be reused when compiling the same regex string.
74 : *
75 : * To save memory, a RegExpShared is not created for a RegExpObject until it is
76 : * needed for execution. When a RegExpShared needs to be created, it is looked
77 : * up in a per-compartment table to allow reuse between objects.
78 : *
79 : * During a GC, RegExpShared instances are marked and swept like GC things.
80 : * Usually, RegExpObjects clear their pointers to their RegExpShareds rather
81 : * than explicitly tracing them, so that the RegExpShared and any jitcode can
82 : * be reclaimed quicker. However, the RegExpShareds are traced through by
83 : * objects when we are preserving jitcode in their zone, to avoid the same
84 : * recompilation inefficiencies as normal Ion and baseline compilation.
85 : */
86 : class RegExpShared : public gc::TenuredCell
87 : {
88 : public:
89 : enum CompilationMode {
90 : Normal,
91 : MatchOnly
92 : };
93 :
94 : enum ForceByteCodeEnum {
95 : DontForceByteCode,
96 : ForceByteCode
97 : };
98 :
99 : using JitCodeTable = UniquePtr<uint8_t[], JS::FreePolicy>;
100 : using JitCodeTables = Vector<JitCodeTable, 0, SystemAllocPolicy>;
101 :
102 : private:
103 : friend class RegExpStatics;
104 : friend class RegExpZone;
105 :
106 : struct RegExpCompilation
107 : {
108 : ReadBarriered<jit::JitCode*> jitCode;
109 : uint8_t* byteCode;
110 :
111 276 : RegExpCompilation() : byteCode(nullptr) {}
112 :
113 449 : bool compiled(ForceByteCodeEnum force = DontForceByteCode) const {
114 449 : return byteCode || (force == DontForceByteCode && jitCode);
115 : }
116 : };
117 :
118 : /* Source to the RegExp, for lazy compilation. */
119 : HeapPtr<JSAtom*> source;
120 :
121 : RegExpFlag flags;
122 : bool canStringMatch;
123 : size_t parenCount;
124 :
125 : RegExpCompilation compilationArray[4];
126 :
127 734 : static int CompilationIndex(CompilationMode mode, bool latin1) {
128 734 : switch (mode) {
129 448 : case Normal: return latin1 ? 0 : 1;
130 286 : case MatchOnly: return latin1 ? 2 : 3;
131 : }
132 0 : MOZ_CRASH();
133 : }
134 :
135 : // Tables referenced by JIT code.
136 : JitCodeTables tables;
137 :
138 : /* Internal functions. */
139 : RegExpShared(JSAtom* source, RegExpFlag flags);
140 :
141 : static bool compile(JSContext* cx, MutableHandleRegExpShared res, HandleLinearString input,
142 : CompilationMode mode, ForceByteCodeEnum force);
143 : static bool compile(JSContext* cx, MutableHandleRegExpShared res, HandleAtom pattern,
144 : HandleLinearString input, CompilationMode mode, ForceByteCodeEnum force);
145 :
146 : static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res,
147 : HandleLinearString input, CompilationMode mode,
148 : ForceByteCodeEnum force);
149 :
150 449 : const RegExpCompilation& compilation(CompilationMode mode, bool latin1) const {
151 449 : return compilationArray[CompilationIndex(mode, latin1)];
152 : }
153 :
154 285 : RegExpCompilation& compilation(CompilationMode mode, bool latin1) {
155 285 : return compilationArray[CompilationIndex(mode, latin1)];
156 : }
157 :
158 : public:
159 : ~RegExpShared() = delete;
160 :
161 : // Execute this RegExp on input starting from searchIndex, filling in
162 : // matches if specified and otherwise only determining if there is a match.
163 : static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res,
164 : HandleLinearString input, size_t searchIndex,
165 : MatchPairs* matches, size_t* endIndex);
166 :
167 : // Register a table with this RegExpShared, and take ownership.
168 16 : bool addTable(JitCodeTable table) {
169 16 : return tables.append(Move(table));
170 : }
171 :
172 : /* Accessors */
173 :
174 149 : size_t getParenCount() const {
175 149 : MOZ_ASSERT(isCompiled());
176 149 : return parenCount;
177 : }
178 :
179 : /* Accounts for the "0" (whole match) pair. */
180 149 : size_t pairCount() const { return getParenCount() + 1; }
181 :
182 273 : JSAtom* getSource() const { return source; }
183 233 : RegExpFlag getFlags() const { return flags; }
184 120 : bool ignoreCase() const { return flags & IgnoreCaseFlag; }
185 40 : bool global() const { return flags & GlobalFlag; }
186 40 : bool multiline() const { return flags & MultilineFlag; }
187 99 : bool sticky() const { return flags & StickyFlag; }
188 80 : bool unicode() const { return flags & UnicodeFlag; }
189 :
190 449 : bool isCompiled(CompilationMode mode, bool latin1,
191 : ForceByteCodeEnum force = DontForceByteCode) const {
192 449 : return compilation(mode, latin1).compiled(force);
193 : }
194 149 : bool isCompiled() const {
195 183 : return isCompiled(Normal, true) || isCompiled(Normal, false)
196 151 : || isCompiled(MatchOnly, true) || isCompiled(MatchOnly, false);
197 : }
198 :
199 : void traceChildren(JSTracer* trc);
200 : void discardJitCode();
201 : void finalize(FreeOp* fop);
202 :
203 0 : static size_t offsetOfSource() {
204 0 : return offsetof(RegExpShared, source);
205 : }
206 :
207 0 : static size_t offsetOfFlags() {
208 0 : return offsetof(RegExpShared, flags);
209 : }
210 :
211 0 : static size_t offsetOfParenCount() {
212 0 : return offsetof(RegExpShared, parenCount);
213 : }
214 :
215 0 : static size_t offsetOfLatin1JitCode(CompilationMode mode) {
216 : return offsetof(RegExpShared, compilationArray)
217 0 : + (CompilationIndex(mode, true) * sizeof(RegExpCompilation))
218 0 : + offsetof(RegExpCompilation, jitCode);
219 : }
220 0 : static size_t offsetOfTwoByteJitCode(CompilationMode mode) {
221 : return offsetof(RegExpShared, compilationArray)
222 0 : + (CompilationIndex(mode, false) * sizeof(RegExpCompilation))
223 0 : + offsetof(RegExpCompilation, jitCode);
224 : }
225 :
226 : size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
227 :
228 : #ifdef DEBUG
229 : static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res, bool match_only,
230 : HandleLinearString input);
231 : #endif
232 : };
233 :
234 : class RegExpZone
235 : {
236 : struct Key {
237 : JSAtom* atom;
238 : uint16_t flag;
239 :
240 : Key() {}
241 148 : Key(JSAtom* atom, RegExpFlag flag)
242 148 : : atom(atom), flag(flag)
243 148 : { }
244 10 : MOZ_IMPLICIT Key(const ReadBarriered<RegExpShared*>& shared)
245 10 : : atom(shared.unbarrieredGet()->getSource()),
246 10 : flag(shared.unbarrieredGet()->getFlags())
247 10 : { }
248 :
249 : typedef Key Lookup;
250 148 : static HashNumber hash(const Lookup& l) {
251 148 : return DefaultHasher<JSAtom*>::hash(l.atom) ^ (l.flag << 1);
252 : }
253 10 : static bool match(Key l, Key r) {
254 10 : return l.atom == r.atom && l.flag == r.flag;
255 : }
256 : };
257 :
258 : /*
259 : * The set of all RegExpShareds in the zone. On every GC, every RegExpShared
260 : * that was not marked is deleted and removed from the set.
261 : */
262 : using Set = JS::WeakCache<JS::GCHashSet<ReadBarriered<RegExpShared*>, Key, RuntimeAllocPolicy>>;
263 : Set set_;
264 :
265 : public:
266 : explicit RegExpZone(Zone* zone);
267 :
268 0 : ~RegExpZone() {
269 0 : MOZ_ASSERT_IF(set_.initialized(), set_.empty());
270 0 : }
271 :
272 : bool init();
273 :
274 15 : bool empty() const { return set_.empty(); }
275 :
276 : RegExpShared* get(JSContext* cx, HandleAtom source, RegExpFlag flags);
277 :
278 : /* Like 'get', but compile 'maybeOpt' (if non-null). */
279 : RegExpShared* get(JSContext* cx, HandleAtom source, JSString* maybeOpt);
280 :
281 : size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
282 : };
283 :
284 0 : class RegExpCompartment
285 : {
286 : /*
287 : * This is the template object where the result of re.exec() is based on,
288 : * if there is a result. This is used in CreateRegExpMatchResult to set
289 : * the input/index properties faster.
290 : */
291 : ReadBarriered<ArrayObject*> matchResultTemplateObject_;
292 :
293 : /*
294 : * The shape of RegExp.prototype object that satisfies following:
295 : * * RegExp.prototype.flags getter is not modified
296 : * * RegExp.prototype.global getter is not modified
297 : * * RegExp.prototype.ignoreCase getter is not modified
298 : * * RegExp.prototype.multiline getter is not modified
299 : * * RegExp.prototype.sticky getter is not modified
300 : * * RegExp.prototype.unicode getter is not modified
301 : * * RegExp.prototype.exec is an own data property
302 : * * RegExp.prototype[@@match] is an own data property
303 : * * RegExp.prototype[@@search] is an own data property
304 : */
305 : ReadBarriered<Shape*> optimizableRegExpPrototypeShape_;
306 :
307 : /*
308 : * The shape of RegExp instance that satisfies following:
309 : * * lastProperty is lastIndex
310 : * * prototype is RegExp.prototype
311 : */
312 : ReadBarriered<Shape*> optimizableRegExpInstanceShape_;
313 :
314 : ArrayObject* createMatchResultTemplateObject(JSContext* cx);
315 :
316 : public:
317 : explicit RegExpCompartment(Zone* zone);
318 :
319 : void sweep(JSRuntime* rt);
320 :
321 : /* Get or create template object used to base the result of .exec() on. */
322 58 : ArrayObject* getOrCreateMatchResultTemplateObject(JSContext* cx) {
323 58 : if (matchResultTemplateObject_)
324 52 : return matchResultTemplateObject_;
325 6 : return createMatchResultTemplateObject(cx);
326 : }
327 :
328 109 : Shape* getOptimizableRegExpPrototypeShape() {
329 109 : return optimizableRegExpPrototypeShape_;
330 : }
331 8 : void setOptimizableRegExpPrototypeShape(Shape* shape) {
332 8 : optimizableRegExpPrototypeShape_ = shape;
333 8 : }
334 93 : Shape* getOptimizableRegExpInstanceShape() {
335 93 : return optimizableRegExpInstanceShape_;
336 : }
337 7 : void setOptimizableRegExpInstanceShape(Shape* shape) {
338 7 : optimizableRegExpInstanceShape_ = shape;
339 7 : }
340 :
341 0 : static size_t offsetOfOptimizableRegExpPrototypeShape() {
342 0 : return offsetof(RegExpCompartment, optimizableRegExpPrototypeShape_);
343 : }
344 0 : static size_t offsetOfOptimizableRegExpInstanceShape() {
345 0 : return offsetof(RegExpCompartment, optimizableRegExpInstanceShape_);
346 : }
347 : };
348 :
349 : } /* namespace js */
350 :
351 : namespace JS {
352 : namespace ubi {
353 :
354 : template <>
355 : class Concrete<js::RegExpShared> : TracerConcrete<js::RegExpShared>
356 : {
357 : protected:
358 0 : explicit Concrete(js::RegExpShared* ptr) : TracerConcrete<js::RegExpShared>(ptr) { }
359 :
360 : public:
361 0 : static void construct(void* storage, js::RegExpShared* ptr) {
362 0 : new (storage) Concrete(ptr);
363 0 : }
364 :
365 0 : CoarseType coarseType() const final { return CoarseType::Other; }
366 :
367 : Size size(mozilla::MallocSizeOf mallocSizeOf) const override;
368 :
369 0 : const char16_t* typeName() const override { return concreteTypeName; }
370 : static const char16_t concreteTypeName[];
371 : };
372 :
373 : } // namespace ubi
374 : } // namespace JS
375 :
376 : #endif /* vm_RegExpShared_h */
|