Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #ifndef jit_x86_shared_MacroAssembler_x86_shared_h
8 : #define jit_x86_shared_MacroAssembler_x86_shared_h
9 :
10 : #include "mozilla/Casting.h"
11 :
12 : #if defined(JS_CODEGEN_X86)
13 : # include "jit/x86/Assembler-x86.h"
14 : #elif defined(JS_CODEGEN_X64)
15 : # include "jit/x64/Assembler-x64.h"
16 : #endif
17 :
18 : #ifdef DEBUG
19 : #define CHECK_BYTEREG(reg) \
20 : JS_BEGIN_MACRO \
21 : AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
22 : MOZ_ASSERT(byteRegs.has(reg)); \
23 : JS_END_MACRO
24 : #define CHECK_BYTEREGS(r1, r2) \
25 : JS_BEGIN_MACRO \
26 : AllocatableGeneralRegisterSet byteRegs(Registers::SingleByteRegs); \
27 : MOZ_ASSERT(byteRegs.has(r1)); \
28 : MOZ_ASSERT(byteRegs.has(r2)); \
29 : JS_END_MACRO
30 : #else
31 : #define CHECK_BYTEREG(reg) (void)0
32 : #define CHECK_BYTEREGS(r1, r2) (void)0
33 : #endif
34 :
35 : namespace js {
36 : namespace jit {
37 :
38 : class MacroAssembler;
39 :
40 4503 : class MacroAssemblerX86Shared : public Assembler
41 : {
42 : private:
43 : // Perform a downcast. Should be removed by Bug 996602.
44 : MacroAssembler& asMasm();
45 : const MacroAssembler& asMasm() const;
46 :
47 : public:
48 : typedef Vector<CodeOffset, 0, SystemAllocPolicy> UsesVector;
49 :
50 : protected:
51 :
52 : // For Double, Float and SimdData, make the move ctors explicit so that MSVC
53 : // knows what to use instead of copying these data structures.
54 : template<class T>
55 0 : struct Constant {
56 : typedef T Pod;
57 :
58 : T value;
59 : UsesVector uses;
60 :
61 0 : explicit Constant(const T& value) : value(value) {}
62 0 : Constant(Constant<T>&& other) : value(other.value), uses(mozilla::Move(other.uses)) {}
63 : explicit Constant(const Constant<T>&) = delete;
64 : };
65 :
66 : // Containers use SystemAllocPolicy since wasm releases memory after each
67 : // function is compiled, and these need to live until after all functions
68 : // are compiled.
69 : using Double = Constant<double>;
70 : Vector<Double, 0, SystemAllocPolicy> doubles_;
71 : typedef HashMap<double, size_t, DefaultHasher<double>, SystemAllocPolicy> DoubleMap;
72 : DoubleMap doubleMap_;
73 :
74 : using Float = Constant<float>;
75 : Vector<Float, 0, SystemAllocPolicy> floats_;
76 : typedef HashMap<float, size_t, DefaultHasher<float>, SystemAllocPolicy> FloatMap;
77 : FloatMap floatMap_;
78 :
79 0 : struct SimdData : public Constant<SimdConstant> {
80 0 : explicit SimdData(SimdConstant d) : Constant<SimdConstant>(d) {}
81 0 : SimdData(SimdData&& d) : Constant<SimdConstant>(mozilla::Move(d)) {}
82 : explicit SimdData(const SimdData&) = delete;
83 : SimdConstant::Type type() const { return value.type(); }
84 : };
85 :
86 : Vector<SimdData, 0, SystemAllocPolicy> simds_;
87 : typedef HashMap<SimdConstant, size_t, SimdConstant, SystemAllocPolicy> SimdMap;
88 : SimdMap simdMap_;
89 :
90 : template<class T, class Map>
91 : T* getConstant(const typename T::Pod& value, Map& map, Vector<T, 0, SystemAllocPolicy>& vec);
92 :
93 : Float* getFloat(float f);
94 : Double* getDouble(double d);
95 : SimdData* getSimdData(const SimdConstant& v);
96 :
97 : public:
98 : using Assembler::call;
99 :
100 4503 : MacroAssemblerX86Shared()
101 4503 : { }
102 :
103 : bool asmMergeWith(const MacroAssemblerX86Shared& other);
104 :
105 : // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, second).
106 : // Checks for NaN if canBeNaN is true.
107 : void minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
108 : void minMaxFloat32(FloatRegister srcDest, FloatRegister second, bool canBeNaN, bool isMax);
109 :
110 11 : void compareDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
111 11 : if (cond & DoubleConditionBitInvert)
112 0 : vucomisd(lhs, rhs);
113 : else
114 11 : vucomisd(rhs, lhs);
115 11 : }
116 :
117 0 : void compareFloat(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs) {
118 0 : if (cond & DoubleConditionBitInvert)
119 0 : vucomiss(lhs, rhs);
120 : else
121 0 : vucomiss(rhs, lhs);
122 0 : }
123 :
124 : void branchNegativeZero(FloatRegister reg, Register scratch, Label* label, bool maybeNonZero = true);
125 : void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label* label);
126 :
127 6062 : void move32(Imm32 imm, Register dest) {
128 : // Use the ImmWord version of mov to register, which has special
129 : // optimizations. Casting to uint32_t here ensures that the value
130 : // is zero-extended.
131 6062 : mov(ImmWord(uint32_t(imm.value)), dest);
132 6062 : }
133 : void move32(Imm32 imm, const Operand& dest) {
134 : movl(imm, dest);
135 : }
136 248 : void move32(Register src, Register dest) {
137 248 : movl(src, dest);
138 248 : }
139 118 : void move32(Register src, const Operand& dest) {
140 118 : movl(src, dest);
141 118 : }
142 3248 : void test32(Register lhs, Register rhs) {
143 3248 : testl(rhs, lhs);
144 3248 : }
145 : void test32(const Address& addr, Imm32 imm) {
146 : testl(imm, Operand(addr));
147 : }
148 7960 : void test32(const Operand lhs, Imm32 imm) {
149 7960 : testl(imm, lhs);
150 7960 : }
151 199 : void test32(Register lhs, Imm32 rhs) {
152 199 : testl(rhs, lhs);
153 199 : }
154 7659 : void cmp32(Register lhs, Imm32 rhs) {
155 7659 : cmpl(rhs, lhs);
156 7659 : }
157 221 : void cmp32(Register lhs, Register rhs) {
158 221 : cmpl(rhs, lhs);
159 221 : }
160 0 : void cmp32(const Address& lhs, Register rhs) {
161 0 : cmp32(Operand(lhs), rhs);
162 0 : }
163 0 : void cmp32(const Address& lhs, Imm32 rhs) {
164 0 : cmp32(Operand(lhs), rhs);
165 0 : }
166 1658 : void cmp32(const Operand& lhs, Imm32 rhs) {
167 1658 : cmpl(rhs, lhs);
168 1658 : }
169 101 : void cmp32(const Operand& lhs, Register rhs) {
170 101 : cmpl(rhs, lhs);
171 101 : }
172 8 : void cmp32(Register lhs, const Operand& rhs) {
173 8 : cmpl(rhs, lhs);
174 8 : }
175 : CodeOffset cmp32WithPatch(Register lhs, Imm32 rhs) {
176 : return cmplWithPatch(rhs, lhs);
177 : }
178 : void atomic_inc32(const Operand& addr) {
179 : lock_incl(addr);
180 : }
181 : void atomic_dec32(const Operand& addr) {
182 : lock_decl(addr);
183 : }
184 :
185 : template <typename T>
186 0 : void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) {
187 0 : CHECK_BYTEREGS(src, output);
188 0 : if (src != output)
189 0 : movl(src, output);
190 0 : lock_xaddb(output, Operand(mem));
191 0 : movsbl(output, output);
192 0 : }
193 :
194 : template <typename T>
195 0 : void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
196 0 : CHECK_BYTEREGS(src, output);
197 0 : MOZ_ASSERT(temp == InvalidReg);
198 0 : if (src != output)
199 0 : movl(src, output);
200 0 : lock_xaddb(output, Operand(mem));
201 0 : movzbl(output, output);
202 0 : }
203 :
204 : template <typename T>
205 0 : void atomicFetchAdd8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
206 0 : CHECK_BYTEREG(output);
207 0 : MOZ_ASSERT(temp == InvalidReg);
208 0 : movb(src, output);
209 0 : lock_xaddb(output, Operand(mem));
210 0 : movsbl(output, output);
211 0 : }
212 :
213 : template <typename T>
214 0 : void atomicFetchAdd8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
215 0 : CHECK_BYTEREG(output);
216 0 : MOZ_ASSERT(temp == InvalidReg);
217 0 : movb(src, output);
218 0 : lock_xaddb(output, Operand(mem));
219 0 : movzbl(output, output);
220 0 : }
221 :
222 : template <typename T>
223 0 : void atomicFetchAdd16SignExtend(Register src, const T& mem, Register temp, Register output) {
224 0 : MOZ_ASSERT(temp == InvalidReg);
225 0 : if (src != output)
226 0 : movl(src, output);
227 0 : lock_xaddw(output, Operand(mem));
228 0 : movswl(output, output);
229 0 : }
230 :
231 : template <typename T>
232 0 : void atomicFetchAdd16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
233 0 : MOZ_ASSERT(temp == InvalidReg);
234 0 : if (src != output)
235 0 : movl(src, output);
236 0 : lock_xaddw(output, Operand(mem));
237 0 : movzwl(output, output);
238 0 : }
239 :
240 : template <typename T>
241 0 : void atomicFetchAdd16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
242 0 : MOZ_ASSERT(temp == InvalidReg);
243 0 : movl(src, output);
244 0 : lock_xaddw(output, Operand(mem));
245 0 : movswl(output, output);
246 0 : }
247 :
248 : template <typename T>
249 0 : void atomicFetchAdd16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
250 0 : MOZ_ASSERT(temp == InvalidReg);
251 0 : movl(src, output);
252 0 : lock_xaddw(output, Operand(mem));
253 0 : movzwl(output, output);
254 0 : }
255 :
256 : template <typename T>
257 0 : void atomicFetchAdd32(Register src, const T& mem, Register temp, Register output) {
258 0 : MOZ_ASSERT(temp == InvalidReg);
259 0 : if (src != output)
260 0 : movl(src, output);
261 0 : lock_xaddl(output, Operand(mem));
262 0 : }
263 :
264 : template <typename T>
265 0 : void atomicFetchAdd32(Imm32 src, const T& mem, Register temp, Register output) {
266 0 : MOZ_ASSERT(temp == InvalidReg);
267 0 : movl(src, output);
268 0 : lock_xaddl(output, Operand(mem));
269 0 : }
270 :
271 : template <typename T>
272 0 : void atomicFetchSub8SignExtend(Register src, const T& mem, Register temp, Register output) {
273 0 : CHECK_BYTEREGS(src, output);
274 0 : MOZ_ASSERT(temp == InvalidReg);
275 0 : if (src != output)
276 0 : movl(src, output);
277 0 : negl(output);
278 0 : lock_xaddb(output, Operand(mem));
279 0 : movsbl(output, output);
280 0 : }
281 :
282 : template <typename T>
283 0 : void atomicFetchSub8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
284 0 : CHECK_BYTEREGS(src, output);
285 0 : MOZ_ASSERT(temp == InvalidReg);
286 0 : if (src != output)
287 0 : movl(src, output);
288 0 : negl(output);
289 0 : lock_xaddb(output, Operand(mem));
290 0 : movzbl(output, output);
291 0 : }
292 :
293 : template <typename T>
294 0 : void atomicFetchSub8SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
295 0 : CHECK_BYTEREG(output);
296 0 : MOZ_ASSERT(temp == InvalidReg);
297 0 : movb(Imm32(-src.value), output);
298 0 : lock_xaddb(output, Operand(mem));
299 0 : movsbl(output, output);
300 0 : }
301 :
302 : template <typename T>
303 0 : void atomicFetchSub8ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
304 0 : CHECK_BYTEREG(output);
305 0 : MOZ_ASSERT(temp == InvalidReg);
306 0 : movb(Imm32(-src.value), output);
307 0 : lock_xaddb(output, Operand(mem));
308 0 : movzbl(output, output);
309 0 : }
310 :
311 : template <typename T>
312 0 : void atomicFetchSub16SignExtend(Register src, const T& mem, Register temp, Register output) {
313 0 : MOZ_ASSERT(temp == InvalidReg);
314 0 : if (src != output)
315 0 : movl(src, output);
316 0 : negl(output);
317 0 : lock_xaddw(output, Operand(mem));
318 0 : movswl(output, output);
319 0 : }
320 :
321 : template <typename T>
322 0 : void atomicFetchSub16ZeroExtend(Register src, const T& mem, Register temp, Register output) {
323 0 : MOZ_ASSERT(temp == InvalidReg);
324 0 : if (src != output)
325 0 : movl(src, output);
326 0 : negl(output);
327 0 : lock_xaddw(output, Operand(mem));
328 0 : movzwl(output, output);
329 0 : }
330 :
331 : template <typename T>
332 0 : void atomicFetchSub16SignExtend(Imm32 src, const T& mem, Register temp, Register output) {
333 0 : MOZ_ASSERT(temp == InvalidReg);
334 0 : movl(Imm32(-src.value), output);
335 0 : lock_xaddw(output, Operand(mem));
336 0 : movswl(output, output);
337 0 : }
338 :
339 : template <typename T>
340 0 : void atomicFetchSub16ZeroExtend(Imm32 src, const T& mem, Register temp, Register output) {
341 0 : MOZ_ASSERT(temp == InvalidReg);
342 0 : movl(Imm32(-src.value), output);
343 0 : lock_xaddw(output, Operand(mem));
344 0 : movzwl(output, output);
345 0 : }
346 :
347 : template <typename T>
348 0 : void atomicFetchSub32(Register src, const T& mem, Register temp, Register output) {
349 0 : MOZ_ASSERT(temp == InvalidReg);
350 0 : if (src != output)
351 0 : movl(src, output);
352 0 : negl(output);
353 0 : lock_xaddl(output, Operand(mem));
354 0 : }
355 :
356 : template <typename T>
357 0 : void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) {
358 0 : movl(Imm32(-src.value), output);
359 0 : lock_xaddl(output, Operand(mem));
360 0 : }
361 :
362 : // requires output == eax
363 : #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \
364 : MOZ_ASSERT(output == eax); \
365 : LOAD(Operand(mem), eax); \
366 : Label again; \
367 : bind(&again); \
368 : movl(eax, temp); \
369 : OP(src, temp); \
370 : LOCK_CMPXCHG(temp, Operand(mem)); \
371 : j(NonZero, &again);
372 :
373 : template <typename S, typename T>
374 0 : void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) {
375 0 : ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
376 0 : CHECK_BYTEREG(temp);
377 0 : movsbl(eax, eax);
378 0 : }
379 : template <typename S, typename T>
380 0 : void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
381 0 : ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
382 0 : CHECK_BYTEREG(temp);
383 0 : movzbl(eax, eax);
384 0 : }
385 : template <typename S, typename T>
386 0 : void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) {
387 0 : ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
388 0 : movswl(eax, eax);
389 0 : }
390 : template <typename S, typename T>
391 0 : void atomicFetchAnd16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
392 0 : ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
393 0 : movzwl(eax, eax);
394 0 : }
395 : template <typename S, typename T>
396 0 : void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) {
397 0 : ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl)
398 0 : }
399 :
400 : template <typename S, typename T>
401 0 : void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) {
402 0 : ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
403 0 : CHECK_BYTEREG(temp);
404 0 : movsbl(eax, eax);
405 0 : }
406 : template <typename S, typename T>
407 0 : void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
408 0 : ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
409 0 : CHECK_BYTEREG(temp);
410 0 : movzbl(eax, eax);
411 0 : }
412 : template <typename S, typename T>
413 0 : void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) {
414 0 : ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
415 0 : movswl(eax, eax);
416 0 : }
417 : template <typename S, typename T>
418 0 : void atomicFetchOr16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
419 0 : ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
420 0 : movzwl(eax, eax);
421 0 : }
422 : template <typename S, typename T>
423 0 : void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) {
424 0 : ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl)
425 0 : }
426 :
427 : template <typename S, typename T>
428 0 : void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) {
429 0 : ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
430 0 : CHECK_BYTEREG(temp);
431 0 : movsbl(eax, eax);
432 0 : }
433 : template <typename S, typename T>
434 0 : void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
435 0 : ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
436 0 : CHECK_BYTEREG(temp);
437 0 : movzbl(eax, eax);
438 0 : }
439 : template <typename S, typename T>
440 0 : void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) {
441 0 : ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
442 0 : movswl(eax, eax);
443 0 : }
444 : template <typename S, typename T>
445 0 : void atomicFetchXor16ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
446 0 : ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
447 0 : movzwl(eax, eax);
448 0 : }
449 : template <typename S, typename T>
450 0 : void atomicFetchXor32(const S& src, const T& mem, Register temp, Register output) {
451 0 : ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchgl)
452 0 : }
453 :
454 : #undef ATOMIC_BITOP_BODY
455 :
456 : // S is Register or Imm32; T is Address or BaseIndex.
457 :
458 : template <typename S, typename T>
459 0 : void atomicAdd8(const S& src, const T& mem) {
460 0 : lock_addb(src, Operand(mem));
461 0 : }
462 : template <typename S, typename T>
463 0 : void atomicAdd16(const S& src, const T& mem) {
464 0 : lock_addw(src, Operand(mem));
465 0 : }
466 : template <typename S, typename T>
467 0 : void atomicAdd32(const S& src, const T& mem) {
468 0 : lock_addl(src, Operand(mem));
469 0 : }
470 : template <typename S, typename T>
471 0 : void atomicSub8(const S& src, const T& mem) {
472 0 : lock_subb(src, Operand(mem));
473 0 : }
474 : template <typename S, typename T>
475 0 : void atomicSub16(const S& src, const T& mem) {
476 0 : lock_subw(src, Operand(mem));
477 0 : }
478 : template <typename S, typename T>
479 0 : void atomicSub32(const S& src, const T& mem) {
480 0 : lock_subl(src, Operand(mem));
481 0 : }
482 : template <typename S, typename T>
483 0 : void atomicAnd8(const S& src, const T& mem) {
484 0 : lock_andb(src, Operand(mem));
485 0 : }
486 : template <typename S, typename T>
487 0 : void atomicAnd16(const S& src, const T& mem) {
488 0 : lock_andw(src, Operand(mem));
489 0 : }
490 : template <typename S, typename T>
491 0 : void atomicAnd32(const S& src, const T& mem) {
492 0 : lock_andl(src, Operand(mem));
493 0 : }
494 : template <typename S, typename T>
495 0 : void atomicOr8(const S& src, const T& mem) {
496 0 : lock_orb(src, Operand(mem));
497 0 : }
498 : template <typename S, typename T>
499 0 : void atomicOr16(const S& src, const T& mem) {
500 0 : lock_orw(src, Operand(mem));
501 0 : }
502 : template <typename S, typename T>
503 0 : void atomicOr32(const S& src, const T& mem) {
504 0 : lock_orl(src, Operand(mem));
505 0 : }
506 : template <typename S, typename T>
507 0 : void atomicXor8(const S& src, const T& mem) {
508 0 : lock_xorb(src, Operand(mem));
509 0 : }
510 : template <typename S, typename T>
511 0 : void atomicXor16(const S& src, const T& mem) {
512 0 : lock_xorw(src, Operand(mem));
513 0 : }
514 : template <typename S, typename T>
515 0 : void atomicXor32(const S& src, const T& mem) {
516 0 : lock_xorl(src, Operand(mem));
517 0 : }
518 :
519 0 : void storeLoadFence() {
520 : // This implementation follows Linux.
521 0 : if (HasSSE2())
522 0 : masm.mfence();
523 : else
524 0 : lock_addl(Imm32(0), Operand(Address(esp, 0)));
525 0 : }
526 :
527 : void branch16(Condition cond, Register lhs, Register rhs, Label* label) {
528 : cmpw(rhs, lhs);
529 : j(cond, label);
530 : }
531 : void branchTest16(Condition cond, Register lhs, Register rhs, Label* label) {
532 : testw(rhs, lhs);
533 : j(cond, label);
534 : }
535 :
536 6902 : void jump(Label* label) {
537 6902 : jmp(label);
538 6902 : }
539 1119 : void jump(JitCode* code) {
540 1119 : jmp(code);
541 1119 : }
542 : void jump(RepatchLabel* label) {
543 : jmp(label);
544 : }
545 191 : void jump(Register reg) {
546 191 : jmp(Operand(reg));
547 191 : }
548 71 : void jump(const Address& addr) {
549 71 : jmp(Operand(addr));
550 71 : }
551 0 : void jump(wasm::TrapDesc target) {
552 0 : jmp(target);
553 0 : }
554 :
555 58 : void convertInt32ToDouble(Register src, FloatRegister dest) {
556 : // vcvtsi2sd and friends write only part of their output register, which
557 : // causes slowdowns on out-of-order processors. Explicitly break
558 : // dependencies with vxorpd (and vxorps elsewhere), which are handled
559 : // specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
560 : // 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
561 : // document.
562 58 : zeroDouble(dest);
563 58 : vcvtsi2sd(src, dest, dest);
564 58 : }
565 0 : void convertInt32ToDouble(const Address& src, FloatRegister dest) {
566 0 : convertInt32ToDouble(Operand(src), dest);
567 0 : }
568 0 : void convertInt32ToDouble(const BaseIndex& src, FloatRegister dest) {
569 0 : convertInt32ToDouble(Operand(src), dest);
570 0 : }
571 0 : void convertInt32ToDouble(const Operand& src, FloatRegister dest) {
572 : // Clear the output register first to break dependencies; see above;
573 0 : zeroDouble(dest);
574 0 : vcvtsi2sd(Operand(src), dest, dest);
575 0 : }
576 0 : void convertInt32ToFloat32(Register src, FloatRegister dest) {
577 : // Clear the output register first to break dependencies; see above;
578 0 : zeroFloat32(dest);
579 0 : vcvtsi2ss(src, dest, dest);
580 0 : }
581 : void convertInt32ToFloat32(const Address& src, FloatRegister dest) {
582 : convertInt32ToFloat32(Operand(src), dest);
583 : }
584 : void convertInt32ToFloat32(const Operand& src, FloatRegister dest) {
585 : // Clear the output register first to break dependencies; see above;
586 : zeroFloat32(dest);
587 : vcvtsi2ss(src, dest, dest);
588 : }
589 1 : Condition testDoubleTruthy(bool truthy, FloatRegister reg) {
590 2 : ScratchDoubleScope scratch(asMasm());
591 1 : zeroDouble(scratch);
592 1 : vucomisd(reg, scratch);
593 2 : return truthy ? NonZero : Zero;
594 : }
595 :
596 : // Class which ensures that registers used in byte ops are compatible with
597 : // such instructions, even if the original register passed in wasn't. This
598 : // only applies to x86, as on x64 all registers are valid single byte regs.
599 : // This doesn't lead to great code but helps to simplify code generation.
600 : //
601 : // Note that this can currently only be used in cases where the register is
602 : // read from by the guarded instruction, not written to.
603 : class AutoEnsureByteRegister {
604 : MacroAssemblerX86Shared* masm;
605 : Register original_;
606 : Register substitute_;
607 :
608 : public:
609 : template <typename T>
610 11 : AutoEnsureByteRegister(MacroAssemblerX86Shared* masm, T address, Register reg)
611 11 : : masm(masm), original_(reg)
612 : {
613 11 : AllocatableGeneralRegisterSet singleByteRegs(Registers::SingleByteRegs);
614 11 : if (singleByteRegs.has(reg)) {
615 11 : substitute_ = reg;
616 : } else {
617 0 : MOZ_ASSERT(address.base != StackPointer);
618 0 : do {
619 0 : substitute_ = singleByteRegs.takeAny();
620 0 : } while (Operand(address).containsReg(substitute_));
621 :
622 0 : masm->push(substitute_);
623 0 : masm->mov(reg, substitute_);
624 : }
625 11 : }
626 :
627 22 : ~AutoEnsureByteRegister() {
628 11 : if (original_ != substitute_)
629 0 : masm->pop(substitute_);
630 11 : }
631 :
632 11 : Register reg() {
633 11 : return substitute_;
634 : }
635 : };
636 :
637 0 : void load8ZeroExtend(const Operand& src, Register dest) {
638 0 : movzbl(src, dest);
639 0 : }
640 124 : void load8ZeroExtend(const Address& src, Register dest) {
641 124 : movzbl(Operand(src), dest);
642 124 : }
643 532 : void load8ZeroExtend(const BaseIndex& src, Register dest) {
644 532 : movzbl(Operand(src), dest);
645 532 : }
646 : void load8SignExtend(const Operand& src, Register dest) {
647 : movsbl(src, dest);
648 : }
649 0 : void load8SignExtend(const Address& src, Register dest) {
650 0 : movsbl(Operand(src), dest);
651 0 : }
652 0 : void load8SignExtend(const BaseIndex& src, Register dest) {
653 0 : movsbl(Operand(src), dest);
654 0 : }
655 : template <typename T>
656 4 : void store8(Imm32 src, const T& dest) {
657 4 : movb(src, Operand(dest));
658 4 : }
659 : template <typename T>
660 11 : void store8(Register src, const T& dest) {
661 22 : AutoEnsureByteRegister ensure(this, dest, src);
662 11 : movb(ensure.reg(), Operand(dest));
663 11 : }
664 : template <typename T>
665 0 : void compareExchange8ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
666 0 : MOZ_ASSERT(output == eax);
667 0 : CHECK_BYTEREG(newval);
668 0 : if (oldval != output)
669 0 : movl(oldval, output);
670 0 : lock_cmpxchgb(newval, Operand(mem));
671 0 : movzbl(output, output);
672 0 : }
673 : template <typename T>
674 0 : void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output) {
675 0 : MOZ_ASSERT(output == eax);
676 0 : CHECK_BYTEREG(newval);
677 0 : if (oldval != output)
678 0 : movl(oldval, output);
679 0 : lock_cmpxchgb(newval, Operand(mem));
680 0 : movsbl(output, output);
681 0 : }
682 : template <typename T>
683 0 : void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) {
684 0 : if (value != output)
685 0 : movl(value, output);
686 0 : xchgb(output, Operand(mem));
687 0 : movzbl(output, output);
688 0 : }
689 : template <typename T>
690 0 : void atomicExchange8SignExtend(const T& mem, Register value, Register output) {
691 0 : if (value != output)
692 0 : movl(value, output);
693 0 : xchgb(output, Operand(mem));
694 0 : movsbl(output, output);
695 0 : }
696 0 : void load16ZeroExtend(const Operand& src, Register dest) {
697 0 : movzwl(src, dest);
698 0 : }
699 126 : void load16ZeroExtend(const Address& src, Register dest) {
700 126 : movzwl(Operand(src), dest);
701 126 : }
702 73 : void load16ZeroExtend(const BaseIndex& src, Register dest) {
703 73 : movzwl(Operand(src), dest);
704 73 : }
705 : template <typename S, typename T>
706 40 : void store16(const S& src, const T& dest) {
707 40 : movw(src, Operand(dest));
708 40 : }
709 : template <typename T>
710 0 : void compareExchange16ZeroExtend(const T& mem, Register oldval, Register newval, Register output) {
711 0 : MOZ_ASSERT(output == eax);
712 0 : if (oldval != output)
713 0 : movl(oldval, output);
714 0 : lock_cmpxchgw(newval, Operand(mem));
715 0 : movzwl(output, output);
716 0 : }
717 : template <typename T>
718 0 : void compareExchange16SignExtend(const T& mem, Register oldval, Register newval, Register output) {
719 0 : MOZ_ASSERT(output == eax);
720 0 : if (oldval != output)
721 0 : movl(oldval, output);
722 0 : lock_cmpxchgw(newval, Operand(mem));
723 0 : movswl(output, output);
724 0 : }
725 : template <typename T>
726 0 : void atomicExchange16ZeroExtend(const T& mem, Register value, Register output) {
727 0 : if (value != output)
728 0 : movl(value, output);
729 0 : xchgw(output, Operand(mem));
730 0 : movzwl(output, output);
731 0 : }
732 : template <typename T>
733 0 : void atomicExchange16SignExtend(const T& mem, Register value, Register output) {
734 0 : if (value != output)
735 0 : movl(value, output);
736 0 : xchgw(output, Operand(mem));
737 0 : movswl(output, output);
738 0 : }
739 : void load16SignExtend(const Operand& src, Register dest) {
740 : movswl(src, dest);
741 : }
742 0 : void load16SignExtend(const Address& src, Register dest) {
743 0 : movswl(Operand(src), dest);
744 0 : }
745 0 : void load16SignExtend(const BaseIndex& src, Register dest) {
746 0 : movswl(Operand(src), dest);
747 0 : }
748 1639 : void load32(const Address& address, Register dest) {
749 1639 : movl(Operand(address), dest);
750 1639 : }
751 51 : void load32(const BaseIndex& src, Register dest) {
752 51 : movl(Operand(src), dest);
753 51 : }
754 0 : void load32(const Operand& src, Register dest) {
755 0 : movl(src, dest);
756 0 : }
757 : template <typename S, typename T>
758 6588 : void store32(const S& src, const T& dest) {
759 6588 : movl(src, Operand(dest));
760 6588 : }
761 : template <typename T>
762 0 : void compareExchange32(const T& mem, Register oldval, Register newval, Register output) {
763 0 : MOZ_ASSERT(output == eax);
764 0 : if (oldval != output)
765 0 : movl(oldval, output);
766 0 : lock_cmpxchgl(newval, Operand(mem));
767 0 : }
768 : template <typename T>
769 0 : void atomicExchange32(const T& mem, Register value, Register output) {
770 0 : if (value != output)
771 0 : movl(value, output);
772 0 : xchgl(output, Operand(mem));
773 0 : }
774 : template <typename S, typename T>
775 : void store32_NoSecondScratch(const S& src, const T& dest) {
776 : store32(src, dest);
777 : }
778 14 : void loadDouble(const Address& src, FloatRegister dest) {
779 14 : vmovsd(src, dest);
780 14 : }
781 0 : void loadDouble(const BaseIndex& src, FloatRegister dest) {
782 0 : vmovsd(src, dest);
783 0 : }
784 0 : void loadDouble(const Operand& src, FloatRegister dest) {
785 0 : switch (src.kind()) {
786 : case Operand::MEM_REG_DISP:
787 0 : loadDouble(src.toAddress(), dest);
788 0 : break;
789 : case Operand::MEM_SCALE:
790 0 : loadDouble(src.toBaseIndex(), dest);
791 0 : break;
792 : default:
793 0 : MOZ_CRASH("unexpected operand kind");
794 : }
795 0 : }
796 0 : void moveDouble(FloatRegister src, FloatRegister dest) {
797 : // Use vmovapd instead of vmovsd to avoid dependencies.
798 0 : vmovapd(src, dest);
799 0 : }
800 59 : void zeroDouble(FloatRegister reg) {
801 59 : vxorpd(reg, reg, reg);
802 59 : }
803 0 : void zeroFloat32(FloatRegister reg) {
804 0 : vxorps(reg, reg, reg);
805 0 : }
806 0 : void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
807 0 : vcvtss2sd(src, dest, dest);
808 0 : }
809 0 : void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
810 0 : vcvtsd2ss(src, dest, dest);
811 0 : }
812 :
813 0 : void loadInt32x4(const Address& addr, FloatRegister dest) {
814 0 : vmovdqa(Operand(addr), dest);
815 0 : }
816 0 : void loadFloat32x4(const Address& addr, FloatRegister dest) {
817 0 : vmovaps(Operand(addr), dest);
818 0 : }
819 0 : void storeInt32x4(FloatRegister src, const Address& addr) {
820 0 : vmovdqa(src, Operand(addr));
821 0 : }
822 0 : void storeFloat32x4(FloatRegister src, const Address& addr) {
823 0 : vmovaps(src, Operand(addr));
824 0 : }
825 :
826 0 : void convertFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest) {
827 : // Note that if the conversion failed (because the converted
828 : // result is larger than the maximum signed int32, or less than the
829 : // least signed int32, or NaN), this will return the undefined integer
830 : // value (0x8000000).
831 0 : vcvttps2dq(src, dest);
832 0 : }
833 0 : void convertInt32x4ToFloat32x4(FloatRegister src, FloatRegister dest) {
834 0 : vcvtdq2ps(src, dest);
835 0 : }
836 :
837 0 : void bitwiseAndSimd128(const Operand& src, FloatRegister dest) {
838 : // TODO Using the "ps" variant for all types incurs a domain crossing
839 : // penalty for integer types and double.
840 0 : vandps(src, dest, dest);
841 0 : }
842 0 : void bitwiseAndNotSimd128(const Operand& src, FloatRegister dest) {
843 0 : vandnps(src, dest, dest);
844 0 : }
845 0 : void bitwiseOrSimd128(const Operand& src, FloatRegister dest) {
846 0 : vorps(src, dest, dest);
847 0 : }
848 0 : void bitwiseXorSimd128(const Operand& src, FloatRegister dest) {
849 0 : vxorps(src, dest, dest);
850 0 : }
851 0 : void zeroSimd128Float(FloatRegister dest) {
852 0 : vxorps(dest, dest, dest);
853 0 : }
854 0 : void zeroSimd128Int(FloatRegister dest) {
855 0 : vpxor(dest, dest, dest);
856 0 : }
857 :
858 : template <class T, class Reg> inline void loadScalar(const Operand& src, Reg dest);
859 : template <class T, class Reg> inline void storeScalar(Reg src, const Address& dest);
860 : template <class T> inline void loadAlignedVector(const Address& src, FloatRegister dest);
861 : template <class T> inline void storeAlignedVector(FloatRegister src, const Address& dest);
862 :
863 0 : void loadInt32x1(const Address& src, FloatRegister dest) {
864 0 : vmovd(Operand(src), dest);
865 0 : }
866 0 : void loadInt32x1(const BaseIndex& src, FloatRegister dest) {
867 0 : vmovd(Operand(src), dest);
868 0 : }
869 0 : void loadInt32x2(const Address& src, FloatRegister dest) {
870 0 : vmovq(Operand(src), dest);
871 0 : }
872 0 : void loadInt32x2(const BaseIndex& src, FloatRegister dest) {
873 0 : vmovq(Operand(src), dest);
874 0 : }
875 0 : void loadInt32x3(const BaseIndex& src, FloatRegister dest) {
876 0 : BaseIndex srcZ(src);
877 0 : srcZ.offset += 2 * sizeof(int32_t);
878 :
879 0 : ScratchSimd128Scope scratch(asMasm());
880 0 : vmovq(Operand(src), dest);
881 0 : vmovd(Operand(srcZ), scratch);
882 0 : vmovlhps(scratch, dest, dest);
883 0 : }
884 0 : void loadInt32x3(const Address& src, FloatRegister dest) {
885 0 : Address srcZ(src);
886 0 : srcZ.offset += 2 * sizeof(int32_t);
887 :
888 0 : ScratchSimd128Scope scratch(asMasm());
889 0 : vmovq(Operand(src), dest);
890 0 : vmovd(Operand(srcZ), scratch);
891 0 : vmovlhps(scratch, dest, dest);
892 0 : }
893 :
894 0 : void loadAlignedSimd128Int(const Address& src, FloatRegister dest) {
895 0 : vmovdqa(Operand(src), dest);
896 0 : }
897 0 : void loadAlignedSimd128Int(const Operand& src, FloatRegister dest) {
898 0 : vmovdqa(src, dest);
899 0 : }
900 0 : void storeAlignedSimd128Int(FloatRegister src, const Address& dest) {
901 0 : vmovdqa(src, Operand(dest));
902 0 : }
903 0 : void moveSimd128Int(FloatRegister src, FloatRegister dest) {
904 0 : vmovdqa(src, dest);
905 0 : }
906 0 : FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) {
907 0 : if (HasAVX())
908 0 : return src;
909 0 : moveSimd128Int(src, dest);
910 0 : return dest;
911 : }
912 : FloatRegister reusedInputAlignedInt32x4(const Operand& src, FloatRegister dest) {
913 : if (HasAVX() && src.kind() == Operand::FPREG)
914 : return FloatRegister::FromCode(src.fpu());
915 : loadAlignedSimd128Int(src, dest);
916 : return dest;
917 : }
918 0 : void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
919 0 : vmovdqu(Operand(src), dest);
920 0 : }
921 0 : void loadUnalignedSimd128Int(const BaseIndex& src, FloatRegister dest) {
922 0 : vmovdqu(Operand(src), dest);
923 0 : }
924 0 : void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
925 0 : vmovdqu(src, dest);
926 0 : }
927 :
928 0 : void storeInt32x1(FloatRegister src, const Address& dest) {
929 0 : vmovd(src, Operand(dest));
930 0 : }
931 0 : void storeInt32x1(FloatRegister src, const BaseIndex& dest) {
932 0 : vmovd(src, Operand(dest));
933 0 : }
934 0 : void storeInt32x2(FloatRegister src, const Address& dest) {
935 0 : vmovq(src, Operand(dest));
936 0 : }
937 0 : void storeInt32x2(FloatRegister src, const BaseIndex& dest) {
938 0 : vmovq(src, Operand(dest));
939 0 : }
940 0 : void storeInt32x3(FloatRegister src, const Address& dest) {
941 0 : Address destZ(dest);
942 0 : destZ.offset += 2 * sizeof(int32_t);
943 0 : vmovq(src, Operand(dest));
944 0 : ScratchSimd128Scope scratch(asMasm());
945 0 : vmovhlps(src, scratch, scratch);
946 0 : vmovd(scratch, Operand(destZ));
947 0 : }
948 0 : void storeInt32x3(FloatRegister src, const BaseIndex& dest) {
949 0 : BaseIndex destZ(dest);
950 0 : destZ.offset += 2 * sizeof(int32_t);
951 0 : vmovq(src, Operand(dest));
952 0 : ScratchSimd128Scope scratch(asMasm());
953 0 : vmovhlps(src, scratch, scratch);
954 0 : vmovd(scratch, Operand(destZ));
955 0 : }
956 :
957 0 : void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
958 0 : vmovdqu(src, Operand(dest));
959 0 : }
960 0 : void storeUnalignedSimd128Int(FloatRegister src, const BaseIndex& dest) {
961 0 : vmovdqu(src, Operand(dest));
962 0 : }
963 0 : void storeUnalignedSimd128Int(FloatRegister src, const Operand& dest) {
964 0 : vmovdqu(src, dest);
965 0 : }
966 0 : void packedEqualInt32x4(const Operand& src, FloatRegister dest) {
967 0 : vpcmpeqd(src, dest, dest);
968 0 : }
969 0 : void packedGreaterThanInt32x4(const Operand& src, FloatRegister dest) {
970 0 : vpcmpgtd(src, dest, dest);
971 0 : }
972 : void packedAddInt8(const Operand& src, FloatRegister dest) {
973 : vpaddb(src, dest, dest);
974 : }
975 0 : void packedSubInt8(const Operand& src, FloatRegister dest) {
976 0 : vpsubb(src, dest, dest);
977 0 : }
978 : void packedAddInt16(const Operand& src, FloatRegister dest) {
979 : vpaddw(src, dest, dest);
980 : }
981 0 : void packedSubInt16(const Operand& src, FloatRegister dest) {
982 0 : vpsubw(src, dest, dest);
983 0 : }
984 : void packedAddInt32(const Operand& src, FloatRegister dest) {
985 : vpaddd(src, dest, dest);
986 : }
987 0 : void packedSubInt32(const Operand& src, FloatRegister dest) {
988 0 : vpsubd(src, dest, dest);
989 0 : }
990 0 : void packedRcpApproximationFloat32x4(const Operand& src, FloatRegister dest) {
991 : // This function is an approximation of the result, this might need
992 : // fix up if the spec requires a given precision for this operation.
993 : // TODO See also bug 1068028.
994 0 : vrcpps(src, dest);
995 0 : }
996 0 : void packedRcpSqrtApproximationFloat32x4(const Operand& src, FloatRegister dest) {
997 : // TODO See comment above. See also bug 1068028.
998 0 : vrsqrtps(src, dest);
999 0 : }
1000 0 : void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
1001 0 : vsqrtps(src, dest);
1002 0 : }
1003 :
1004 0 : void packedLeftShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
1005 0 : vpsllw(src, dest, dest);
1006 0 : }
1007 0 : void packedLeftShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1008 0 : vpsllw(count, dest, dest);
1009 0 : }
1010 0 : void packedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
1011 0 : vpsraw(src, dest, dest);
1012 0 : }
1013 0 : void packedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1014 0 : vpsraw(count, dest, dest);
1015 0 : }
1016 0 : void packedUnsignedRightShiftByScalarInt16x8(FloatRegister src, FloatRegister dest) {
1017 0 : vpsrlw(src, dest, dest);
1018 0 : }
1019 0 : void packedUnsignedRightShiftByScalarInt16x8(Imm32 count, FloatRegister dest) {
1020 0 : vpsrlw(count, dest, dest);
1021 0 : }
1022 :
1023 0 : void packedLeftShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1024 0 : vpslld(src, dest, dest);
1025 0 : }
1026 0 : void packedLeftShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1027 0 : vpslld(count, dest, dest);
1028 0 : }
1029 0 : void packedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1030 0 : vpsrad(src, dest, dest);
1031 0 : }
1032 0 : void packedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1033 0 : vpsrad(count, dest, dest);
1034 0 : }
1035 0 : void packedUnsignedRightShiftByScalarInt32x4(FloatRegister src, FloatRegister dest) {
1036 0 : vpsrld(src, dest, dest);
1037 0 : }
1038 0 : void packedUnsignedRightShiftByScalarInt32x4(Imm32 count, FloatRegister dest) {
1039 0 : vpsrld(count, dest, dest);
1040 0 : }
1041 :
1042 0 : void loadFloat32x3(const Address& src, FloatRegister dest) {
1043 0 : Address srcZ(src);
1044 0 : srcZ.offset += 2 * sizeof(float);
1045 0 : vmovsd(src, dest);
1046 0 : ScratchSimd128Scope scratch(asMasm());
1047 0 : vmovss(srcZ, scratch);
1048 0 : vmovlhps(scratch, dest, dest);
1049 0 : }
1050 0 : void loadFloat32x3(const BaseIndex& src, FloatRegister dest) {
1051 0 : BaseIndex srcZ(src);
1052 0 : srcZ.offset += 2 * sizeof(float);
1053 0 : vmovsd(src, dest);
1054 0 : ScratchSimd128Scope scratch(asMasm());
1055 0 : vmovss(srcZ, scratch);
1056 0 : vmovlhps(scratch, dest, dest);
1057 0 : }
1058 :
1059 0 : void loadAlignedSimd128Float(const Address& src, FloatRegister dest) {
1060 0 : vmovaps(Operand(src), dest);
1061 0 : }
1062 0 : void loadAlignedSimd128Float(const Operand& src, FloatRegister dest) {
1063 0 : vmovaps(src, dest);
1064 0 : }
1065 :
1066 0 : void storeAlignedSimd128Float(FloatRegister src, const Address& dest) {
1067 0 : vmovaps(src, Operand(dest));
1068 0 : }
1069 0 : void moveSimd128Float(FloatRegister src, FloatRegister dest) {
1070 0 : vmovaps(src, dest);
1071 0 : }
1072 0 : FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) {
1073 0 : if (HasAVX())
1074 0 : return src;
1075 0 : moveSimd128Float(src, dest);
1076 0 : return dest;
1077 : }
1078 0 : FloatRegister reusedInputAlignedFloat32x4(const Operand& src, FloatRegister dest) {
1079 0 : if (HasAVX() && src.kind() == Operand::FPREG)
1080 0 : return FloatRegister::FromCode(src.fpu());
1081 0 : loadAlignedSimd128Float(src, dest);
1082 0 : return dest;
1083 : }
1084 165534 : void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
1085 165534 : vmovups(Operand(src), dest);
1086 165522 : }
1087 0 : void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
1088 0 : vmovdqu(Operand(src), dest);
1089 0 : }
1090 0 : void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
1091 0 : vmovups(src, dest);
1092 0 : }
1093 165665 : void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
1094 165665 : vmovups(src, Operand(dest));
1095 165663 : }
1096 0 : void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
1097 0 : vmovups(src, Operand(dest));
1098 0 : }
1099 0 : void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
1100 0 : vmovups(src, dest);
1101 0 : }
1102 0 : void packedAddFloat32(const Operand& src, FloatRegister dest) {
1103 0 : vaddps(src, dest, dest);
1104 0 : }
1105 : void packedSubFloat32(const Operand& src, FloatRegister dest) {
1106 : vsubps(src, dest, dest);
1107 : }
1108 : void packedMulFloat32(const Operand& src, FloatRegister dest) {
1109 : vmulps(src, dest, dest);
1110 : }
1111 : void packedDivFloat32(const Operand& src, FloatRegister dest) {
1112 : vdivps(src, dest, dest);
1113 : }
1114 :
1115 0 : static uint32_t ComputeShuffleMask(uint32_t x = 0, uint32_t y = 1,
1116 : uint32_t z = 2, uint32_t w = 3)
1117 : {
1118 0 : MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
1119 0 : uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
1120 0 : MOZ_ASSERT(r < 256);
1121 0 : return r;
1122 : }
1123 :
1124 0 : void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1125 0 : vpshufd(mask, src, dest);
1126 0 : }
1127 0 : void moveLowInt32(FloatRegister src, Register dest) {
1128 0 : vmovd(src, dest);
1129 0 : }
1130 :
1131 0 : void moveHighPairToLowPairFloat32(FloatRegister src, FloatRegister dest) {
1132 0 : vmovhlps(src, dest, dest);
1133 0 : }
1134 0 : void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
1135 : // The shuffle instruction on x86 is such that it moves 2 words from
1136 : // the dest and 2 words from the src operands. To simplify things, just
1137 : // clobber the output with the input and apply the instruction
1138 : // afterwards.
1139 : // Note: this is useAtStart-safe because src isn't read afterwards.
1140 0 : FloatRegister srcCopy = reusedInputFloat32x4(src, dest);
1141 0 : vshufps(mask, srcCopy, srcCopy, dest);
1142 0 : }
1143 : void shuffleMix(uint32_t mask, const Operand& src, FloatRegister dest) {
1144 : // Note this uses vshufps, which is a cross-domain penalty on CPU where it
1145 : // applies, but that's the way clang and gcc do it.
1146 : vshufps(mask, src, dest, dest);
1147 : }
1148 :
1149 : void moveFloatAsDouble(Register src, FloatRegister dest) {
1150 : vmovd(src, dest);
1151 : vcvtss2sd(dest, dest, dest);
1152 : }
1153 : void loadFloatAsDouble(const Address& src, FloatRegister dest) {
1154 : vmovss(src, dest);
1155 : vcvtss2sd(dest, dest, dest);
1156 : }
1157 : void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest) {
1158 : vmovss(src, dest);
1159 : vcvtss2sd(dest, dest, dest);
1160 : }
1161 : void loadFloatAsDouble(const Operand& src, FloatRegister dest) {
1162 : loadFloat32(src, dest);
1163 : vcvtss2sd(dest, dest, dest);
1164 : }
1165 0 : void loadFloat32(const Address& src, FloatRegister dest) {
1166 0 : vmovss(src, dest);
1167 0 : }
1168 0 : void loadFloat32(const BaseIndex& src, FloatRegister dest) {
1169 0 : vmovss(src, dest);
1170 0 : }
1171 0 : void loadFloat32(const Operand& src, FloatRegister dest) {
1172 0 : switch (src.kind()) {
1173 : case Operand::MEM_REG_DISP:
1174 0 : loadFloat32(src.toAddress(), dest);
1175 0 : break;
1176 : case Operand::MEM_SCALE:
1177 0 : loadFloat32(src.toBaseIndex(), dest);
1178 0 : break;
1179 : default:
1180 0 : MOZ_CRASH("unexpected operand kind");
1181 : }
1182 0 : }
1183 0 : void moveFloat32(FloatRegister src, FloatRegister dest) {
1184 : // Use vmovaps instead of vmovss to avoid dependencies.
1185 0 : vmovaps(src, dest);
1186 0 : }
1187 :
1188 : // Checks whether a double is representable as a 32-bit integer. If so, the
1189 : // integer is written to the output register. Otherwise, a bailout is taken to
1190 : // the given snapshot. This function overwrites the scratch float register.
1191 26 : void convertDoubleToInt32(FloatRegister src, Register dest, Label* fail,
1192 : bool negativeZeroCheck = true)
1193 : {
1194 : // Check for -0.0
1195 26 : if (negativeZeroCheck)
1196 0 : branchNegativeZero(src, dest, fail);
1197 :
1198 52 : ScratchDoubleScope scratch(asMasm());
1199 26 : vcvttsd2si(src, dest);
1200 26 : convertInt32ToDouble(dest, scratch);
1201 26 : vucomisd(scratch, src);
1202 26 : j(Assembler::Parity, fail);
1203 26 : j(Assembler::NotEqual, fail);
1204 26 : }
1205 :
1206 : // Checks whether a float32 is representable as a 32-bit integer. If so, the
1207 : // integer is written to the output register. Otherwise, a bailout is taken to
1208 : // the given snapshot. This function overwrites the scratch float register.
1209 0 : void convertFloat32ToInt32(FloatRegister src, Register dest, Label* fail,
1210 : bool negativeZeroCheck = true)
1211 : {
1212 : // Check for -0.0
1213 0 : if (negativeZeroCheck)
1214 0 : branchNegativeZeroFloat32(src, dest, fail);
1215 :
1216 0 : ScratchFloat32Scope scratch(asMasm());
1217 0 : vcvttss2si(src, dest);
1218 0 : convertInt32ToFloat32(dest, scratch);
1219 0 : vucomiss(scratch, src);
1220 0 : j(Assembler::Parity, fail);
1221 0 : j(Assembler::NotEqual, fail);
1222 0 : }
1223 :
1224 : inline void clampIntToUint8(Register reg);
1225 :
1226 0 : bool maybeInlineDouble(double d, FloatRegister dest) {
1227 : // Loading zero with xor is specially optimized in hardware.
1228 0 : if (mozilla::IsPositiveZero(d)) {
1229 0 : zeroDouble(dest);
1230 0 : return true;
1231 : }
1232 :
1233 : // It is also possible to load several common constants using vpcmpeqw
1234 : // to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
1235 : // as described in "13.4 Generating constants" of
1236 : // "2. Optimizing subroutines in assembly language" by Agner Fog, and as
1237 : // previously implemented here. However, with x86 and x64 both using
1238 : // constant pool loads for double constants, this is probably only
1239 : // worthwhile in cases where a load is likely to be delayed.
1240 :
1241 0 : return false;
1242 : }
1243 :
1244 0 : bool maybeInlineFloat(float f, FloatRegister dest) {
1245 : // See comment above
1246 0 : if (mozilla::IsPositiveZero(f)) {
1247 0 : zeroFloat32(dest);
1248 0 : return true;
1249 : }
1250 0 : return false;
1251 : }
1252 :
1253 0 : bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
1254 0 : static const SimdConstant zero = SimdConstant::SplatX4(0);
1255 0 : static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
1256 0 : if (v == zero) {
1257 0 : zeroSimd128Int(dest);
1258 0 : return true;
1259 : }
1260 0 : if (v == minusOne) {
1261 0 : vpcmpeqw(Operand(dest), dest, dest);
1262 0 : return true;
1263 : }
1264 0 : return false;
1265 : }
1266 0 : bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
1267 0 : static const SimdConstant zero = SimdConstant::SplatX4(0.f);
1268 0 : if (v == zero) {
1269 : // This won't get inlined if the SimdConstant v contains -0 in any
1270 : // lane, as operator== here does a memcmp.
1271 0 : zeroSimd128Float(dest);
1272 0 : return true;
1273 : }
1274 0 : return false;
1275 : }
1276 :
1277 26 : void convertBoolToInt32(Register source, Register dest) {
1278 : // Note that C++ bool is only 1 byte, so zero extend it to clear the
1279 : // higher-order bits.
1280 26 : movzbl(source, dest);
1281 26 : }
1282 :
1283 35 : void emitSet(Assembler::Condition cond, Register dest,
1284 : Assembler::NaNCond ifNaN = Assembler::NaN_HandledByCond) {
1285 35 : if (AllocatableGeneralRegisterSet(Registers::SingleByteRegs).has(dest)) {
1286 : // If the register we're defining is a single byte register,
1287 : // take advantage of the setCC instruction
1288 35 : setCC(cond, dest);
1289 35 : movzbl(dest, dest);
1290 :
1291 35 : if (ifNaN != Assembler::NaN_HandledByCond) {
1292 0 : Label noNaN;
1293 0 : j(Assembler::NoParity, &noNaN);
1294 0 : mov(ImmWord(ifNaN == Assembler::NaN_IsTrue), dest);
1295 0 : bind(&noNaN);
1296 : }
1297 : } else {
1298 0 : Label end;
1299 0 : Label ifFalse;
1300 :
1301 0 : if (ifNaN == Assembler::NaN_IsFalse)
1302 0 : j(Assembler::Parity, &ifFalse);
1303 : // Note a subtlety here: FLAGS is live at this point, and the
1304 : // mov interface doesn't guarantee to preserve FLAGS. Use
1305 : // movl instead of mov, because the movl instruction
1306 : // preserves FLAGS.
1307 0 : movl(Imm32(1), dest);
1308 0 : j(cond, &end);
1309 0 : if (ifNaN == Assembler::NaN_IsTrue)
1310 0 : j(Assembler::Parity, &end);
1311 0 : bind(&ifFalse);
1312 0 : mov(ImmWord(0), dest);
1313 :
1314 0 : bind(&end);
1315 : }
1316 35 : }
1317 :
1318 : // Emit a JMP that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
1319 2514 : CodeOffset toggledJump(Label* label) {
1320 2514 : CodeOffset offset(size());
1321 2514 : jump(label);
1322 2514 : return offset;
1323 : }
1324 :
1325 : template <typename T>
1326 1357 : void computeEffectiveAddress(const T& address, Register dest) {
1327 1357 : lea(Operand(address), dest);
1328 1357 : }
1329 :
1330 93 : void checkStackAlignment() {
1331 : // Exists for ARM compatibility.
1332 93 : }
1333 :
1334 35 : CodeOffset labelForPatch() {
1335 35 : return CodeOffset(size());
1336 : }
1337 :
1338 80 : void abiret() {
1339 80 : ret();
1340 80 : }
1341 :
1342 : template<typename T>
1343 : void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
1344 : Register temp, AnyRegister output);
1345 :
1346 : template<typename T>
1347 : void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
1348 : Register temp, AnyRegister output);
1349 :
1350 : protected:
1351 : bool buildOOLFakeExitFrame(void* fakeReturnAddr);
1352 : };
1353 :
1354 : // Specialize for float to use movaps. Use movdqa for everything else.
1355 : template <>
1356 : inline void
1357 0 : MacroAssemblerX86Shared::loadAlignedVector<float>(const Address& src, FloatRegister dest)
1358 : {
1359 0 : loadAlignedSimd128Float(src, dest);
1360 0 : }
1361 :
1362 : template <typename T>
1363 : inline void
1364 0 : MacroAssemblerX86Shared::loadAlignedVector(const Address& src, FloatRegister dest)
1365 : {
1366 0 : loadAlignedSimd128Int(src, dest);
1367 0 : }
1368 :
1369 : // Specialize for float to use movaps. Use movdqa for everything else.
1370 : template <>
1371 : inline void
1372 0 : MacroAssemblerX86Shared::storeAlignedVector<float>(FloatRegister src, const Address& dest)
1373 : {
1374 0 : storeAlignedSimd128Float(src, dest);
1375 0 : }
1376 :
1377 : template <typename T>
1378 : inline void
1379 0 : MacroAssemblerX86Shared::storeAlignedVector(FloatRegister src, const Address& dest)
1380 : {
1381 0 : storeAlignedSimd128Int(src, dest);
1382 0 : }
1383 :
1384 : template <> inline void
1385 0 : MacroAssemblerX86Shared::loadScalar<int8_t>(const Operand& src, Register dest) {
1386 0 : load8ZeroExtend(src, dest);
1387 0 : }
1388 : template <> inline void
1389 0 : MacroAssemblerX86Shared::loadScalar<int16_t>(const Operand& src, Register dest) {
1390 0 : load16ZeroExtend(src, dest);
1391 0 : }
1392 : template <> inline void
1393 0 : MacroAssemblerX86Shared::loadScalar<int32_t>(const Operand& src, Register dest) {
1394 0 : load32(src, dest);
1395 0 : }
1396 : template <> inline void
1397 0 : MacroAssemblerX86Shared::loadScalar<float>(const Operand& src, FloatRegister dest) {
1398 0 : loadFloat32(src, dest);
1399 0 : }
1400 :
1401 : template <> inline void
1402 0 : MacroAssemblerX86Shared::storeScalar<int8_t>(Register src, const Address& dest) {
1403 0 : store8(src, dest);
1404 0 : }
1405 : template <> inline void
1406 0 : MacroAssemblerX86Shared::storeScalar<int16_t>(Register src, const Address& dest) {
1407 0 : store16(src, dest);
1408 0 : }
1409 : template <> inline void
1410 0 : MacroAssemblerX86Shared::storeScalar<int32_t>(Register src, const Address& dest) {
1411 0 : store32(src, dest);
1412 0 : }
1413 : template <> inline void
1414 0 : MacroAssemblerX86Shared::storeScalar<float>(FloatRegister src, const Address& dest) {
1415 0 : vmovss(src, dest);
1416 0 : }
1417 :
1418 : } // namespace jit
1419 : } // namespace js
1420 :
1421 : #undef CHECK_BYTEREG
1422 : #undef CHECK_BYTEREGS
1423 :
1424 : #endif /* jit_x86_shared_MacroAssembler_x86_shared_h */
|