Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99: */
3 :
4 : // Copyright 2012 the V8 project authors. All rights reserved.
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following
13 : // disclaimer in the documentation and/or other materials provided
14 : // with the distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived
17 : // from this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_
32 : #define V8_REGEXP_MACRO_ASSEMBLER_H_
33 :
34 : #include "irregexp/RegExpAST.h"
35 : #include "irregexp/RegExpEngine.h"
36 : #include "jit/MacroAssembler.h"
37 :
38 : namespace js {
39 : namespace irregexp {
40 :
41 : class MOZ_STACK_CLASS RegExpMacroAssembler
42 : {
43 : public:
44 40 : RegExpMacroAssembler(JSContext* cx, LifoAlloc& alloc, size_t numSavedRegisters)
45 40 : : slow_safe_compiler_(false),
46 : global_mode_(NOT_GLOBAL),
47 : alloc_(alloc),
48 : num_registers_(numSavedRegisters),
49 40 : num_saved_registers_(numSavedRegisters)
50 40 : {}
51 :
52 : enum StackCheckFlag {
53 : kNoStackLimitCheck = false,
54 : kCheckStackLimit = true
55 : };
56 :
57 : // The implementation must be able to handle at least:
58 : static const int kMaxRegister = (1 << 16) - 1;
59 : static const int kMaxCPOffset = (1 << 15) - 1;
60 : static const int kMinCPOffset = -(1 << 15);
61 :
62 : static const int kTableSizeBits = 7;
63 : static const int kTableSize = 1 << kTableSizeBits;
64 : static const int kTableMask = kTableSize - 1;
65 :
66 : // Controls the generation of large inlined constants in the code.
67 40 : void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
68 : bool slow_safe() { return slow_safe_compiler_; }
69 :
70 : enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
71 :
72 : // Set whether the regular expression has the global flag. Exiting due to
73 : // a failure in a global regexp may still mean success overall.
74 0 : inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
75 185 : inline bool global() { return global_mode_ != NOT_GLOBAL; }
76 21 : inline bool global_with_zero_length_check() {
77 21 : return global_mode_ == GLOBAL;
78 : }
79 :
80 : LifoAlloc& alloc() { return alloc_; }
81 :
82 : virtual RegExpCode GenerateCode(JSContext* cx, bool match_only) = 0;
83 :
84 : // The maximal number of pushes between stack checks. Users must supply
85 : // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
86 : // at least once for every stack_limit() pushes that are executed.
87 : virtual int stack_limit_slack() = 0;
88 :
89 0 : virtual bool CanReadUnaligned() { return false; }
90 :
91 : virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
92 : virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
93 :
94 : // Continues execution from the position pushed on the top of the backtrack
95 : // stack by an earlier PushBacktrack.
96 : virtual void Backtrack() = 0;
97 :
98 : virtual void Bind(jit::Label* label) = 0;
99 : virtual void CheckAtStart(jit::Label* on_at_start) = 0;
100 :
101 : // Dispatch after looking the current character up in a 2-bits-per-entry
102 : // map. The destinations vector has up to 4 labels.
103 : virtual void CheckCharacter(unsigned c, jit::Label* on_equal) = 0;
104 :
105 : // Bitwise and the current character with the given constant and then
106 : // check for a match with c.
107 : virtual void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal) = 0;
108 :
109 : virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0;
110 : virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0;
111 : virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
112 : virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
113 : virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
114 : virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
115 : bool unicode) = 0;
116 :
117 : // Check the current character for a match with a literal character. If we
118 : // fail to match then goto the on_failure label. End of input always
119 : // matches. If the label is nullptr then we should pop a backtrack address off
120 : // the stack and go to that.
121 : virtual void CheckNotCharacter(unsigned c, jit::Label* on_not_equal) = 0;
122 : virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal) = 0;
123 :
124 : // Subtract a constant from the current character, then and with the given
125 : // constant and then check for a match with c.
126 : virtual void CheckNotCharacterAfterMinusAnd(char16_t c,
127 : char16_t minus,
128 : char16_t and_with,
129 : jit::Label* on_not_equal) = 0;
130 :
131 : virtual void CheckCharacterInRange(char16_t from, char16_t to, // Both inclusive.
132 : jit::Label* on_in_range) = 0;
133 :
134 : virtual void CheckCharacterNotInRange(char16_t from, char16_t to, // Both inclusive.
135 : jit::Label* on_not_in_range) = 0;
136 :
137 : // The current character (modulus the kTableSize) is looked up in the byte
138 : // array, and if the found byte is non-zero, we jump to the on_bit_set label.
139 : virtual void CheckBitInTable(RegExpShared::JitCodeTable table, jit::Label* on_bit_set) = 0;
140 :
141 : // Checks whether the given offset from the current position is before
142 : // the end of the string. May overwrite the current character.
143 0 : virtual void CheckPosition(int cp_offset, jit::Label* on_outside_input) {
144 0 : LoadCurrentCharacter(cp_offset, on_outside_input, true);
145 0 : }
146 :
147 : // Jump to either the target label or the top of the backtrack stack.
148 : virtual void JumpOrBacktrack(jit::Label* to) = 0;
149 :
150 : // Check whether a standard/default character class matches the current
151 : // character. Returns false if the type of special character class does
152 : // not have custom support.
153 : // May clobber the current loaded character.
154 0 : virtual bool CheckSpecialCharacterClass(char16_t type, jit::Label* on_no_match) {
155 0 : return false;
156 : }
157 :
158 : virtual void Fail() = 0;
159 :
160 : // Check whether a register is >= a given constant and go to a label if it
161 : // is. Backtracks instead if the label is nullptr.
162 : virtual void IfRegisterGE(int reg, int comparand, jit::Label* if_ge) = 0;
163 :
164 : // Check whether a register is < a given constant and go to a label if it is.
165 : // Backtracks instead if the label is nullptr.
166 : virtual void IfRegisterLT(int reg, int comparand, jit::Label* if_lt) = 0;
167 :
168 : // Check whether a register is == to the current position and go to a
169 : // label if it is.
170 : virtual void IfRegisterEqPos(int reg, jit::Label* if_eq) = 0;
171 :
172 : virtual void LoadCurrentCharacter(int cp_offset,
173 : jit::Label* on_end_of_input,
174 : bool check_bounds = true,
175 : int characters = 1) = 0;
176 : virtual void PopCurrentPosition() = 0;
177 : virtual void PopRegister(int register_index) = 0;
178 :
179 : virtual void PushCurrentPosition() = 0;
180 : virtual void PushRegister(int register_index, StackCheckFlag check_stack_limit) = 0;
181 : virtual void ReadCurrentPositionFromRegister(int reg) = 0;
182 : virtual void ReadBacktrackStackPointerFromRegister(int reg) = 0;
183 : virtual void SetCurrentPositionFromEnd(int by) = 0;
184 : virtual void SetRegister(int register_index, int to) = 0;
185 :
186 : // Return whether the matching (with a global regexp) will be restarted.
187 : virtual bool Succeed() = 0;
188 :
189 : virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
190 : virtual void ClearRegisters(int reg_from, int reg_to) = 0;
191 : virtual void WriteBacktrackStackPointerToRegister(int reg) = 0;
192 :
193 : // Pushes the label on the backtrack stack, so that a following Backtrack
194 : // will go to this label. Always checks the backtrack stack limit.
195 : virtual void PushBacktrack(jit::Label* label) = 0;
196 :
197 : // Bind a label that was previously used by PushBacktrack.
198 : virtual void BindBacktrack(jit::Label* label) = 0;
199 :
200 : private:
201 : bool slow_safe_compiler_;
202 : GlobalMode global_mode_;
203 : LifoAlloc& alloc_;
204 :
205 : protected:
206 : int num_registers_;
207 : int num_saved_registers_;
208 :
209 720 : void checkRegister(int reg) {
210 720 : MOZ_ASSERT(reg >= 0);
211 720 : MOZ_ASSERT(reg <= kMaxRegister);
212 720 : if (num_registers_ <= reg)
213 14 : num_registers_ = reg + 1;
214 720 : }
215 : };
216 :
217 : template <typename CharT>
218 : int
219 : CaseInsensitiveCompareStrings(const CharT* substring1, const CharT* substring2, size_t byteLength);
220 :
221 : template <typename CharT>
222 : int
223 : CaseInsensitiveCompareUCStrings(const CharT* substring1, const CharT* substring2,
224 : size_t byteLength);
225 :
226 : class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacroAssembler
227 : {
228 : public:
229 : InterpretedRegExpMacroAssembler(JSContext* cx, LifoAlloc* alloc, size_t numSavedRegisters);
230 : ~InterpretedRegExpMacroAssembler();
231 :
232 : // Inherited virtual methods.
233 : RegExpCode GenerateCode(JSContext* cx, bool match_only);
234 : void AdvanceCurrentPosition(int by);
235 : void AdvanceRegister(int reg, int by);
236 : void Backtrack();
237 : void Bind(jit::Label* label);
238 : void CheckAtStart(jit::Label* on_at_start);
239 : void CheckCharacter(unsigned c, jit::Label* on_equal);
240 : void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal);
241 : void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
242 : void CheckCharacterLT(char16_t limit, jit::Label* on_less);
243 : void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
244 : void CheckNotAtStart(jit::Label* on_not_at_start);
245 : void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
246 : void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
247 : void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
248 : void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
249 : void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
250 : jit::Label* on_not_equal);
251 : void CheckCharacterInRange(char16_t from, char16_t to,
252 : jit::Label* on_in_range);
253 : void CheckCharacterNotInRange(char16_t from, char16_t to,
254 : jit::Label* on_not_in_range);
255 : void CheckBitInTable(RegExpShared::JitCodeTable table, jit::Label* on_bit_set);
256 : void JumpOrBacktrack(jit::Label* to);
257 : void Fail();
258 : void IfRegisterGE(int reg, int comparand, jit::Label* if_ge);
259 : void IfRegisterLT(int reg, int comparand, jit::Label* if_lt);
260 : void IfRegisterEqPos(int reg, jit::Label* if_eq);
261 : void LoadCurrentCharacter(int cp_offset, jit::Label* on_end_of_input,
262 : bool check_bounds = true, int characters = 1);
263 : void PopCurrentPosition();
264 : void PopRegister(int register_index);
265 : void PushCurrentPosition();
266 : void PushRegister(int register_index, StackCheckFlag check_stack_limit);
267 : void ReadCurrentPositionFromRegister(int reg);
268 : void ReadBacktrackStackPointerFromRegister(int reg);
269 : void SetCurrentPositionFromEnd(int by);
270 : void SetRegister(int register_index, int to);
271 : bool Succeed();
272 : void WriteCurrentPositionToRegister(int reg, int cp_offset);
273 : void ClearRegisters(int reg_from, int reg_to);
274 : void WriteBacktrackStackPointerToRegister(int reg);
275 : void PushBacktrack(jit::Label* label);
276 : void BindBacktrack(jit::Label* label);
277 :
278 : // The byte-code interpreter checks on each push anyway.
279 0 : int stack_limit_slack() { return 1; }
280 :
281 : private:
282 : void Expand();
283 :
284 : // Code and bitmap emission.
285 : void EmitOrLink(jit::Label* label);
286 : void Emit32(uint32_t x);
287 : void Emit16(uint32_t x);
288 : void Emit8(uint32_t x);
289 : void Emit(uint32_t bc, uint32_t arg);
290 :
291 : jit::Label backtrack_;
292 :
293 : // The program counter.
294 : int pc_;
295 :
296 : int advance_current_start_;
297 : int advance_current_offset_;
298 : int advance_current_end_;
299 :
300 : static const int kInvalidPC = -1;
301 :
302 : uint8_t* buffer_;
303 : int length_;
304 : };
305 :
306 : } } // namespace js::irregexp
307 :
308 : #endif // V8_REGEXP_MACRO_ASSEMBLER_H_
|