Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99: */
3 :
4 : // Copyright 2011 the V8 project authors. All rights reserved.
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following
13 : // disclaimer in the documentation and/or other materials provided
14 : // with the distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived
17 : // from this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // A simple interpreter for the Irregexp byte code.
32 :
33 : #include "irregexp/RegExpBytecode.h"
34 : #include "irregexp/RegExpMacroAssembler.h"
35 : #include "vm/MatchPairs.h"
36 :
37 : #include "jscntxtinlines.h"
38 :
39 : using namespace js;
40 : using namespace js::irregexp;
41 :
42 : static const size_t kBitsPerByte = 8;
43 : static const size_t kBitsPerByteLog2 = 3;
44 :
45 0 : class MOZ_STACK_CLASS RegExpStackCursor
46 : {
47 : public:
48 0 : explicit RegExpStackCursor(JSContext* cx)
49 0 : : cx(cx), cursor(nullptr)
50 0 : {}
51 :
52 0 : bool init() {
53 0 : if (!stack.init()) {
54 0 : ReportOutOfMemory(cx);
55 0 : return false;
56 : }
57 0 : cursor = base();
58 0 : return true;
59 : }
60 :
61 0 : bool push(int32_t value) {
62 0 : *cursor++ = value;
63 0 : if (cursor >= stack.limit()) {
64 0 : int32_t pos = position();
65 0 : if (!stack.grow()) {
66 0 : ReportOverRecursed(cx);
67 0 : return false;
68 : }
69 0 : setPosition(pos);
70 : }
71 0 : return true;
72 : }
73 :
74 0 : int32_t pop() {
75 0 : MOZ_ASSERT(cursor > base());
76 0 : return *--cursor;
77 : }
78 :
79 0 : int32_t peek() {
80 0 : MOZ_ASSERT(cursor > base());
81 0 : return *(cursor - 1);
82 : }
83 :
84 0 : int32_t position() {
85 0 : MOZ_ASSERT(cursor >= base());
86 0 : return cursor - base();
87 : }
88 :
89 0 : void setPosition(int32_t position) {
90 0 : cursor = base() + position;
91 0 : MOZ_ASSERT(cursor < stack.limit());
92 0 : }
93 :
94 : private:
95 : JSContext* cx;
96 : RegExpStack stack;
97 :
98 : int32_t* cursor;
99 :
100 0 : int32_t* base() { return (int32_t*) stack.base(); }
101 : };
102 :
103 : static int32_t
104 0 : Load32Aligned(const uint8_t* pc)
105 : {
106 0 : MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 3) == 0);
107 0 : return *reinterpret_cast<const int32_t*>(pc);
108 : }
109 :
110 : static int32_t
111 0 : Load16Aligned(const uint8_t* pc)
112 : {
113 0 : MOZ_ASSERT((reinterpret_cast<uintptr_t>(pc) & 1) == 0);
114 0 : return *reinterpret_cast<const uint16_t*>(pc);
115 : }
116 :
117 : #define BYTECODE(name) case BC_##name:
118 :
119 : template <typename CharT>
120 : RegExpRunStatus
121 0 : irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t current,
122 : size_t length, MatchPairs* matches, size_t* endIndex)
123 : {
124 0 : const uint8_t* pc = byteCode;
125 :
126 0 : uint32_t current_char = current ? chars[current - 1] : '\n';
127 :
128 0 : RegExpStackCursor stack(cx);
129 :
130 0 : if (!stack.init())
131 0 : return RegExpRunStatus_Error;
132 :
133 0 : int32_t numRegisters = Load32Aligned(pc);
134 0 : pc += 4;
135 :
136 0 : Vector<int32_t, 0, SystemAllocPolicy> registers;
137 0 : if (!registers.growByUninitialized(numRegisters)) {
138 0 : ReportOutOfMemory(cx);
139 0 : return RegExpRunStatus_Error;
140 : }
141 0 : for (size_t i = 0; i < (size_t) numRegisters; i++)
142 0 : registers[i] = -1;
143 :
144 0 : while (true) {
145 0 : int32_t insn = Load32Aligned(pc);
146 0 : switch (insn & BYTECODE_MASK) {
147 : BYTECODE(BREAK)
148 0 : MOZ_CRASH("Bad bytecode: BREAK");
149 : BYTECODE(PUSH_CP)
150 0 : if (!stack.push(current))
151 0 : return RegExpRunStatus_Error;
152 0 : pc += BC_PUSH_CP_LENGTH;
153 0 : break;
154 : BYTECODE(PUSH_BT)
155 0 : if (!stack.push(Load32Aligned(pc + 4)))
156 0 : return RegExpRunStatus_Error;
157 0 : pc += BC_PUSH_BT_LENGTH;
158 0 : break;
159 : BYTECODE(PUSH_REGISTER)
160 0 : if (!stack.push(registers[insn >> BYTECODE_SHIFT]))
161 0 : return RegExpRunStatus_Error;
162 0 : pc += BC_PUSH_REGISTER_LENGTH;
163 0 : break;
164 : BYTECODE(SET_REGISTER)
165 0 : registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
166 0 : pc += BC_SET_REGISTER_LENGTH;
167 0 : break;
168 : BYTECODE(ADVANCE_REGISTER)
169 0 : registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
170 0 : pc += BC_ADVANCE_REGISTER_LENGTH;
171 0 : break;
172 : BYTECODE(SET_REGISTER_TO_CP)
173 0 : registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
174 0 : pc += BC_SET_REGISTER_TO_CP_LENGTH;
175 0 : break;
176 : BYTECODE(SET_CP_TO_REGISTER)
177 0 : current = registers[insn >> BYTECODE_SHIFT];
178 0 : pc += BC_SET_CP_TO_REGISTER_LENGTH;
179 0 : break;
180 : BYTECODE(SET_REGISTER_TO_SP)
181 0 : registers[insn >> BYTECODE_SHIFT] = stack.position();
182 0 : pc += BC_SET_REGISTER_TO_SP_LENGTH;
183 0 : break;
184 : BYTECODE(SET_SP_TO_REGISTER)
185 0 : stack.setPosition(registers[insn >> BYTECODE_SHIFT]);
186 0 : pc += BC_SET_SP_TO_REGISTER_LENGTH;
187 0 : break;
188 : BYTECODE(POP_CP)
189 0 : current = stack.pop();
190 0 : pc += BC_POP_CP_LENGTH;
191 0 : break;
192 : BYTECODE(POP_BT)
193 0 : if (!CheckForInterrupt(cx))
194 0 : return RegExpRunStatus_Error;
195 0 : pc = byteCode + stack.pop();
196 0 : break;
197 : BYTECODE(POP_REGISTER)
198 0 : registers[insn >> BYTECODE_SHIFT] = stack.pop();
199 0 : pc += BC_POP_REGISTER_LENGTH;
200 0 : break;
201 : BYTECODE(FAIL)
202 0 : return RegExpRunStatus_Success_NotFound;
203 : BYTECODE(SUCCEED)
204 0 : if (matches)
205 0 : memcpy(matches->pairsRaw(), registers.begin(), matches->length() * 2 * sizeof(int32_t));
206 0 : else if (endIndex)
207 0 : *endIndex = registers[1];
208 0 : return RegExpRunStatus_Success;
209 : BYTECODE(ADVANCE_CP)
210 0 : current += insn >> BYTECODE_SHIFT;
211 0 : pc += BC_ADVANCE_CP_LENGTH;
212 0 : break;
213 : BYTECODE(GOTO)
214 0 : pc = byteCode + Load32Aligned(pc + 4);
215 0 : break;
216 : BYTECODE(ADVANCE_CP_AND_GOTO)
217 0 : current += insn >> BYTECODE_SHIFT;
218 0 : pc = byteCode + Load32Aligned(pc + 4);
219 0 : break;
220 : BYTECODE(CHECK_GREEDY)
221 0 : if ((int32_t)current == stack.peek()) {
222 0 : stack.pop();
223 0 : pc = byteCode + Load32Aligned(pc + 4);
224 : } else {
225 0 : pc += BC_CHECK_GREEDY_LENGTH;
226 : }
227 0 : break;
228 : BYTECODE(LOAD_CURRENT_CHAR) {
229 0 : size_t pos = current + (insn >> BYTECODE_SHIFT);
230 0 : if (pos >= length) {
231 0 : pc = byteCode + Load32Aligned(pc + 4);
232 : } else {
233 0 : current_char = chars[pos];
234 0 : pc += BC_LOAD_CURRENT_CHAR_LENGTH;
235 : }
236 0 : break;
237 : }
238 : BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
239 0 : int pos = current + (insn >> BYTECODE_SHIFT);
240 0 : current_char = chars[pos];
241 0 : pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
242 0 : break;
243 : }
244 : BYTECODE(LOAD_2_CURRENT_CHARS) {
245 0 : size_t pos = current + (insn >> BYTECODE_SHIFT);
246 0 : if (pos + 2 > length) {
247 0 : pc = byteCode + Load32Aligned(pc + 4);
248 : } else {
249 0 : CharT next = chars[pos + 1];
250 0 : current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(CharT))));
251 0 : pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
252 : }
253 0 : break;
254 : }
255 : BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
256 0 : int pos = current + (insn >> BYTECODE_SHIFT);
257 0 : char16_t next = chars[pos + 1];
258 0 : current_char = (chars[pos] | (next << (kBitsPerByte * sizeof(char16_t))));
259 0 : pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
260 0 : break;
261 : }
262 : BYTECODE(LOAD_4_CURRENT_CHARS)
263 0 : MOZ_CRASH("ASCII handling implemented");
264 : BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED)
265 0 : MOZ_CRASH("ASCII handling implemented");
266 : BYTECODE(CHECK_4_CHARS) {
267 0 : uint32_t c = Load32Aligned(pc + 4);
268 0 : if (c == current_char)
269 0 : pc = byteCode + Load32Aligned(pc + 8);
270 : else
271 0 : pc += BC_CHECK_4_CHARS_LENGTH;
272 0 : break;
273 : }
274 : BYTECODE(CHECK_CHAR) {
275 0 : uint32_t c = (insn >> BYTECODE_SHIFT);
276 0 : if (c == current_char)
277 0 : pc = byteCode + Load32Aligned(pc + 4);
278 : else
279 0 : pc += BC_CHECK_CHAR_LENGTH;
280 0 : break;
281 : }
282 : BYTECODE(CHECK_NOT_4_CHARS) {
283 0 : uint32_t c = Load32Aligned(pc + 4);
284 0 : if (c != current_char)
285 0 : pc = byteCode + Load32Aligned(pc + 8);
286 : else
287 0 : pc += BC_CHECK_NOT_4_CHARS_LENGTH;
288 0 : break;
289 : }
290 : BYTECODE(CHECK_NOT_CHAR) {
291 0 : uint32_t c = (insn >> BYTECODE_SHIFT);
292 0 : if (c != current_char)
293 0 : pc = byteCode + Load32Aligned(pc + 4);
294 : else
295 0 : pc += BC_CHECK_NOT_CHAR_LENGTH;
296 0 : break;
297 : }
298 : BYTECODE(AND_CHECK_4_CHARS) {
299 0 : uint32_t c = Load32Aligned(pc + 4);
300 0 : if (c == (current_char & Load32Aligned(pc + 8)))
301 0 : pc = byteCode + Load32Aligned(pc + 12);
302 : else
303 0 : pc += BC_AND_CHECK_4_CHARS_LENGTH;
304 0 : break;
305 : }
306 : BYTECODE(AND_CHECK_CHAR) {
307 0 : uint32_t c = (insn >> BYTECODE_SHIFT);
308 0 : if (c == (current_char & Load32Aligned(pc + 4)))
309 0 : pc = byteCode + Load32Aligned(pc + 8);
310 : else
311 0 : pc += BC_AND_CHECK_CHAR_LENGTH;
312 0 : break;
313 : }
314 : BYTECODE(AND_CHECK_NOT_4_CHARS) {
315 0 : uint32_t c = Load32Aligned(pc + 4);
316 0 : if (c != (current_char & Load32Aligned(pc + 8)))
317 0 : pc = byteCode + Load32Aligned(pc + 12);
318 : else
319 0 : pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
320 0 : break;
321 : }
322 : BYTECODE(AND_CHECK_NOT_CHAR) {
323 0 : uint32_t c = (insn >> BYTECODE_SHIFT);
324 0 : if (c != (current_char & Load32Aligned(pc + 4)))
325 0 : pc = byteCode + Load32Aligned(pc + 8);
326 : else
327 0 : pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
328 0 : break;
329 : }
330 : BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
331 0 : uint32_t c = (insn >> BYTECODE_SHIFT);
332 0 : uint32_t minus = Load16Aligned(pc + 4);
333 0 : uint32_t mask = Load16Aligned(pc + 6);
334 0 : if (c != ((current_char - minus) & mask))
335 0 : pc = byteCode + Load32Aligned(pc + 8);
336 : else
337 0 : pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
338 0 : break;
339 : }
340 : BYTECODE(CHECK_CHAR_IN_RANGE) {
341 0 : uint32_t from = Load16Aligned(pc + 4);
342 0 : uint32_t to = Load16Aligned(pc + 6);
343 0 : if (from <= current_char && current_char <= to)
344 0 : pc = byteCode + Load32Aligned(pc + 8);
345 : else
346 0 : pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
347 0 : break;
348 : }
349 : BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
350 0 : uint32_t from = Load16Aligned(pc + 4);
351 0 : uint32_t to = Load16Aligned(pc + 6);
352 0 : if (from > current_char || current_char > to)
353 0 : pc = byteCode + Load32Aligned(pc + 8);
354 : else
355 0 : pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
356 0 : break;
357 : }
358 : BYTECODE(CHECK_BIT_IN_TABLE) {
359 0 : int mask = RegExpMacroAssembler::kTableMask;
360 0 : uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
361 0 : int bit = (current_char & (kBitsPerByte - 1));
362 0 : if ((b & (1 << bit)) != 0)
363 0 : pc = byteCode + Load32Aligned(pc + 4);
364 : else
365 0 : pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
366 0 : break;
367 : }
368 : BYTECODE(CHECK_LT) {
369 0 : uint32_t limit = (insn >> BYTECODE_SHIFT);
370 0 : if (current_char < limit)
371 0 : pc = byteCode + Load32Aligned(pc + 4);
372 : else
373 0 : pc += BC_CHECK_LT_LENGTH;
374 0 : break;
375 : }
376 : BYTECODE(CHECK_GT) {
377 0 : uint32_t limit = (insn >> BYTECODE_SHIFT);
378 0 : if (current_char > limit)
379 0 : pc = byteCode + Load32Aligned(pc + 4);
380 : else
381 0 : pc += BC_CHECK_GT_LENGTH;
382 0 : break;
383 : }
384 : BYTECODE(CHECK_REGISTER_LT)
385 0 : if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4))
386 0 : pc = byteCode + Load32Aligned(pc + 8);
387 : else
388 0 : pc += BC_CHECK_REGISTER_LT_LENGTH;
389 0 : break;
390 : BYTECODE(CHECK_REGISTER_GE)
391 0 : if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4))
392 0 : pc = byteCode + Load32Aligned(pc + 8);
393 : else
394 0 : pc += BC_CHECK_REGISTER_GE_LENGTH;
395 0 : break;
396 : BYTECODE(CHECK_REGISTER_EQ_POS)
397 0 : if (registers[insn >> BYTECODE_SHIFT] == (int32_t) current)
398 0 : pc = byteCode + Load32Aligned(pc + 4);
399 : else
400 0 : pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
401 0 : break;
402 : BYTECODE(CHECK_NOT_REGS_EQUAL)
403 0 : if (registers[insn >> BYTECODE_SHIFT] == registers[Load32Aligned(pc + 4)])
404 0 : pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
405 : else
406 0 : pc = byteCode + Load32Aligned(pc + 8);
407 0 : break;
408 : BYTECODE(CHECK_NOT_BACK_REF) {
409 0 : int from = registers[insn >> BYTECODE_SHIFT];
410 0 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
411 0 : if (from < 0 || len <= 0) {
412 0 : pc += BC_CHECK_NOT_BACK_REF_LENGTH;
413 0 : break;
414 : }
415 0 : if (current + len > length) {
416 0 : pc = byteCode + Load32Aligned(pc + 4);
417 0 : break;
418 : } else {
419 : int i;
420 0 : for (i = 0; i < len; i++) {
421 0 : if (chars[from + i] != chars[current + i]) {
422 0 : pc = byteCode + Load32Aligned(pc + 4);
423 0 : break;
424 : }
425 : }
426 0 : if (i < len) break;
427 0 : current += len;
428 : }
429 0 : pc += BC_CHECK_NOT_BACK_REF_LENGTH;
430 0 : break;
431 : }
432 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
433 0 : int from = registers[insn >> BYTECODE_SHIFT];
434 0 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
435 0 : if (from < 0 || len <= 0) {
436 0 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
437 0 : break;
438 : }
439 0 : if (current + len > length) {
440 0 : pc = byteCode + Load32Aligned(pc + 4);
441 0 : break;
442 : }
443 0 : if (CaseInsensitiveCompareStrings(chars + from, chars + current, len * sizeof(CharT))) {
444 0 : current += len;
445 0 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
446 : } else {
447 0 : pc = byteCode + Load32Aligned(pc + 4);
448 : }
449 0 : break;
450 : }
451 : BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
452 0 : int from = registers[insn >> BYTECODE_SHIFT];
453 0 : int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
454 0 : if (from < 0 || len <= 0) {
455 0 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
456 0 : break;
457 : }
458 0 : if (current + len > length) {
459 0 : pc = byteCode + Load32Aligned(pc + 4);
460 0 : break;
461 : }
462 0 : if (CaseInsensitiveCompareUCStrings(chars + from, chars + current,
463 : len * sizeof(CharT)))
464 : {
465 0 : current += len;
466 0 : pc += BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_LENGTH;
467 : } else {
468 0 : pc = byteCode + Load32Aligned(pc + 4);
469 : }
470 0 : break;
471 : }
472 : BYTECODE(CHECK_AT_START)
473 0 : if (current == 0)
474 0 : pc = byteCode + Load32Aligned(pc + 4);
475 : else
476 0 : pc += BC_CHECK_AT_START_LENGTH;
477 0 : break;
478 : BYTECODE(CHECK_NOT_AT_START)
479 0 : if (current == 0)
480 0 : pc += BC_CHECK_NOT_AT_START_LENGTH;
481 : else
482 0 : pc = byteCode + Load32Aligned(pc + 4);
483 0 : break;
484 : BYTECODE(SET_CURRENT_POSITION_FROM_END) {
485 0 : size_t by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
486 0 : if (length - current > by) {
487 0 : current = length - by;
488 0 : current_char = chars[current - 1];
489 : }
490 0 : pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
491 0 : break;
492 : }
493 : default:
494 0 : MOZ_CRASH("Bad bytecode");
495 : }
496 : }
497 : }
498 :
499 : template RegExpRunStatus
500 : irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const Latin1Char* chars, size_t current,
501 : size_t length, MatchPairs* matches, size_t* endIndex);
502 :
503 : template RegExpRunStatus
504 : irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const char16_t* chars, size_t current,
505 : size_t length, MatchPairs* matches, size_t* endIndex);
|