Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "jit/x86-shared/CodeGenerator-x86-shared.h"
8 :
9 : #include "mozilla/DebugOnly.h"
10 : #include "mozilla/MathAlgorithms.h"
11 :
12 : #include "jsmath.h"
13 :
14 : #include "jit/JitCompartment.h"
15 : #include "jit/JitFrames.h"
16 : #include "jit/Linker.h"
17 : #include "jit/RangeAnalysis.h"
18 : #include "vm/TraceLogging.h"
19 :
20 : #include "jit/MacroAssembler-inl.h"
21 : #include "jit/shared/CodeGenerator-shared-inl.h"
22 :
23 : using namespace js;
24 : using namespace js::jit;
25 :
26 : using mozilla::Abs;
27 : using mozilla::BitwiseCast;
28 : using mozilla::DebugOnly;
29 : using mozilla::FloatingPoint;
30 : using mozilla::FloorLog2;
31 : using mozilla::NegativeInfinity;
32 : using mozilla::SpecificNaN;
33 :
34 : using JS::GenericNaN;
35 :
36 : namespace js {
37 : namespace jit {
38 :
39 8 : CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm)
40 8 : : CodeGeneratorShared(gen, graph, masm)
41 : {
42 8 : }
43 :
44 : #ifdef JS_PUNBOX64
45 : Operand
46 0 : CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
47 : {
48 0 : return ToOperand(input.value());
49 : }
50 : #else
51 : Register64
52 : CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
53 : {
54 : return ToRegister64(input);
55 : }
56 : #endif
57 :
58 : void
59 370 : OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen)
60 : {
61 370 : codegen->visitOutOfLineBailout(this);
62 370 : }
63 :
64 : void
65 78 : CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue,
66 : MBasicBlock* mirFalse, Assembler::NaNCond ifNaN)
67 : {
68 78 : if (ifNaN == Assembler::NaN_IsFalse)
69 0 : jumpToBlock(mirFalse, Assembler::Parity);
70 78 : else if (ifNaN == Assembler::NaN_IsTrue)
71 0 : jumpToBlock(mirTrue, Assembler::Parity);
72 :
73 78 : if (isNextBlock(mirFalse->lir())) {
74 31 : jumpToBlock(mirTrue, cond);
75 : } else {
76 47 : jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
77 47 : jumpToBlock(mirTrue);
78 : }
79 78 : }
80 :
81 : void
82 0 : CodeGeneratorX86Shared::visitDouble(LDouble* ins)
83 : {
84 0 : const LDefinition* out = ins->getDef(0);
85 0 : masm.loadConstantDouble(ins->getDouble(), ToFloatRegister(out));
86 0 : }
87 :
88 : void
89 0 : CodeGeneratorX86Shared::visitFloat32(LFloat32* ins)
90 : {
91 0 : const LDefinition* out = ins->getDef(0);
92 0 : masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out));
93 0 : }
94 :
95 : void
96 55 : CodeGeneratorX86Shared::visitTestIAndBranch(LTestIAndBranch* test)
97 : {
98 55 : Register input = ToRegister(test->input());
99 55 : masm.test32(input, input);
100 55 : emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse());
101 55 : }
102 :
103 : void
104 0 : CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch* test)
105 : {
106 0 : const LAllocation* opd = test->input();
107 :
108 : // vucomisd flags:
109 : // Z P C
110 : // ---------
111 : // NaN 1 1 1
112 : // > 0 0 0
113 : // < 0 0 1
114 : // = 1 0 0
115 : //
116 : // NaN is falsey, so comparing against 0 and then using the Z flag is
117 : // enough to determine which branch to take.
118 0 : ScratchDoubleScope scratch(masm);
119 0 : masm.zeroDouble(scratch);
120 0 : masm.vucomisd(scratch, ToFloatRegister(opd));
121 0 : emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
122 0 : }
123 :
124 : void
125 0 : CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch* test)
126 : {
127 0 : const LAllocation* opd = test->input();
128 : // vucomiss flags are the same as doubles; see comment above
129 : {
130 0 : ScratchFloat32Scope scratch(masm);
131 0 : masm.zeroFloat32(scratch);
132 0 : masm.vucomiss(scratch, ToFloatRegister(opd));
133 : }
134 0 : emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
135 0 : }
136 :
137 : void
138 0 : CodeGeneratorX86Shared::visitBitAndAndBranch(LBitAndAndBranch* baab)
139 : {
140 0 : if (baab->right()->isConstant())
141 0 : masm.test32(ToRegister(baab->left()), Imm32(ToInt32(baab->right())));
142 : else
143 0 : masm.test32(ToRegister(baab->left()), ToRegister(baab->right()));
144 0 : emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
145 0 : }
146 :
147 : void
148 45 : CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right)
149 : {
150 : #ifdef JS_CODEGEN_X64
151 45 : if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol) {
152 0 : masm.cmpPtr(ToRegister(left), ToOperand(right));
153 0 : return;
154 : }
155 : #endif
156 :
157 45 : if (right->isConstant())
158 37 : masm.cmp32(ToRegister(left), Imm32(ToInt32(right)));
159 : else
160 8 : masm.cmp32(ToRegister(left), ToOperand(right));
161 : }
162 :
163 : void
164 22 : CodeGeneratorX86Shared::visitCompare(LCompare* comp)
165 : {
166 22 : MCompare* mir = comp->mir();
167 22 : emitCompare(mir->compareType(), comp->left(), comp->right());
168 22 : masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output()));
169 22 : }
170 :
171 : void
172 23 : CodeGeneratorX86Shared::visitCompareAndBranch(LCompareAndBranch* comp)
173 : {
174 23 : MCompare* mir = comp->cmpMir();
175 23 : emitCompare(mir->compareType(), comp->left(), comp->right());
176 23 : Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop());
177 23 : emitBranch(cond, comp->ifTrue(), comp->ifFalse());
178 23 : }
179 :
180 : void
181 0 : CodeGeneratorX86Shared::visitCompareD(LCompareD* comp)
182 : {
183 0 : FloatRegister lhs = ToFloatRegister(comp->left());
184 0 : FloatRegister rhs = ToFloatRegister(comp->right());
185 :
186 0 : Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
187 :
188 0 : Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
189 0 : if (comp->mir()->operandsAreNeverNaN())
190 0 : nanCond = Assembler::NaN_HandledByCond;
191 :
192 0 : masm.compareDouble(cond, lhs, rhs);
193 0 : masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
194 0 : }
195 :
196 : void
197 0 : CodeGeneratorX86Shared::visitCompareF(LCompareF* comp)
198 : {
199 0 : FloatRegister lhs = ToFloatRegister(comp->left());
200 0 : FloatRegister rhs = ToFloatRegister(comp->right());
201 :
202 0 : Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
203 :
204 0 : Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
205 0 : if (comp->mir()->operandsAreNeverNaN())
206 0 : nanCond = Assembler::NaN_HandledByCond;
207 :
208 0 : masm.compareFloat(cond, lhs, rhs);
209 0 : masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
210 0 : }
211 :
212 : void
213 0 : CodeGeneratorX86Shared::visitNotI(LNotI* ins)
214 : {
215 0 : masm.cmp32(ToRegister(ins->input()), Imm32(0));
216 0 : masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
217 0 : }
218 :
219 : void
220 0 : CodeGeneratorX86Shared::visitNotD(LNotD* ins)
221 : {
222 0 : FloatRegister opd = ToFloatRegister(ins->input());
223 :
224 : // Not returns true if the input is a NaN. We don't have to worry about
225 : // it if we know the input is never NaN though.
226 0 : Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
227 0 : if (ins->mir()->operandIsNeverNaN())
228 0 : nanCond = Assembler::NaN_HandledByCond;
229 :
230 0 : ScratchDoubleScope scratch(masm);
231 0 : masm.zeroDouble(scratch);
232 0 : masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch);
233 0 : masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
234 0 : }
235 :
236 : void
237 0 : CodeGeneratorX86Shared::visitNotF(LNotF* ins)
238 : {
239 0 : FloatRegister opd = ToFloatRegister(ins->input());
240 :
241 : // Not returns true if the input is a NaN. We don't have to worry about
242 : // it if we know the input is never NaN though.
243 0 : Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
244 0 : if (ins->mir()->operandIsNeverNaN())
245 0 : nanCond = Assembler::NaN_HandledByCond;
246 :
247 0 : ScratchFloat32Scope scratch(masm);
248 0 : masm.zeroFloat32(scratch);
249 0 : masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch);
250 0 : masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
251 0 : }
252 :
253 : void
254 0 : CodeGeneratorX86Shared::visitCompareDAndBranch(LCompareDAndBranch* comp)
255 : {
256 0 : FloatRegister lhs = ToFloatRegister(comp->left());
257 0 : FloatRegister rhs = ToFloatRegister(comp->right());
258 :
259 0 : Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
260 :
261 0 : Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
262 0 : if (comp->cmpMir()->operandsAreNeverNaN())
263 0 : nanCond = Assembler::NaN_HandledByCond;
264 :
265 0 : masm.compareDouble(cond, lhs, rhs);
266 0 : emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
267 0 : }
268 :
269 : void
270 0 : CodeGeneratorX86Shared::visitCompareFAndBranch(LCompareFAndBranch* comp)
271 : {
272 0 : FloatRegister lhs = ToFloatRegister(comp->left());
273 0 : FloatRegister rhs = ToFloatRegister(comp->right());
274 :
275 0 : Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
276 :
277 0 : Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
278 0 : if (comp->cmpMir()->operandsAreNeverNaN())
279 0 : nanCond = Assembler::NaN_HandledByCond;
280 :
281 0 : masm.compareFloat(cond, lhs, rhs);
282 0 : emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
283 0 : }
284 :
285 : void
286 0 : CodeGeneratorX86Shared::visitWasmStackArg(LWasmStackArg* ins)
287 : {
288 0 : const MWasmStackArg* mir = ins->mir();
289 0 : Address dst(StackPointer, mir->spOffset());
290 0 : if (ins->arg()->isConstant()) {
291 0 : masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
292 0 : } else if (ins->arg()->isGeneralReg()) {
293 0 : masm.storePtr(ToRegister(ins->arg()), dst);
294 : } else {
295 0 : switch (mir->input()->type()) {
296 : case MIRType::Double:
297 0 : masm.storeDouble(ToFloatRegister(ins->arg()), dst);
298 0 : return;
299 : case MIRType::Float32:
300 0 : masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
301 0 : return;
302 : // StackPointer is SIMD-aligned and ABIArgGenerator guarantees
303 : // stack offsets are SIMD-aligned.
304 : case MIRType::Int32x4:
305 : case MIRType::Bool32x4:
306 0 : masm.storeAlignedSimd128Int(ToFloatRegister(ins->arg()), dst);
307 0 : return;
308 : case MIRType::Float32x4:
309 0 : masm.storeAlignedSimd128Float(ToFloatRegister(ins->arg()), dst);
310 0 : return;
311 0 : default: break;
312 : }
313 0 : MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected mir type in WasmStackArg");
314 : }
315 : }
316 :
317 : void
318 0 : CodeGeneratorX86Shared::visitWasmStackArgI64(LWasmStackArgI64* ins)
319 : {
320 0 : const MWasmStackArg* mir = ins->mir();
321 0 : Address dst(StackPointer, mir->spOffset());
322 0 : if (IsConstant(ins->arg()))
323 0 : masm.store64(Imm64(ToInt64(ins->arg())), dst);
324 : else
325 0 : masm.store64(ToRegister64(ins->arg()), dst);
326 0 : }
327 :
328 : void
329 0 : CodeGeneratorX86Shared::visitWasmSelect(LWasmSelect* ins)
330 : {
331 0 : MIRType mirType = ins->mir()->type();
332 :
333 0 : Register cond = ToRegister(ins->condExpr());
334 0 : Operand falseExpr = ToOperand(ins->falseExpr());
335 :
336 0 : masm.test32(cond, cond);
337 :
338 0 : if (mirType == MIRType::Int32) {
339 0 : Register out = ToRegister(ins->output());
340 0 : MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output");
341 0 : masm.cmovz(falseExpr, out);
342 0 : return;
343 : }
344 :
345 0 : FloatRegister out = ToFloatRegister(ins->output());
346 0 : MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, "true expr input is reused for output");
347 :
348 0 : Label done;
349 0 : masm.j(Assembler::NonZero, &done);
350 :
351 0 : if (mirType == MIRType::Float32) {
352 0 : if (falseExpr.kind() == Operand::FPREG)
353 0 : masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out);
354 : else
355 0 : masm.loadFloat32(falseExpr, out);
356 0 : } else if (mirType == MIRType::Double) {
357 0 : if (falseExpr.kind() == Operand::FPREG)
358 0 : masm.moveDouble(ToFloatRegister(ins->falseExpr()), out);
359 : else
360 0 : masm.loadDouble(falseExpr, out);
361 : } else {
362 0 : MOZ_CRASH("unhandled type in visitWasmSelect!");
363 : }
364 :
365 0 : masm.bind(&done);
366 0 : return;
367 : }
368 :
369 : void
370 0 : CodeGeneratorX86Shared::visitWasmReinterpret(LWasmReinterpret* lir)
371 : {
372 0 : MOZ_ASSERT(gen->compilingWasm());
373 0 : MWasmReinterpret* ins = lir->mir();
374 :
375 0 : MIRType to = ins->type();
376 : #ifdef DEBUG
377 0 : MIRType from = ins->input()->type();
378 : #endif
379 :
380 0 : switch (to) {
381 : case MIRType::Int32:
382 0 : MOZ_ASSERT(from == MIRType::Float32);
383 0 : masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output()));
384 0 : break;
385 : case MIRType::Float32:
386 0 : MOZ_ASSERT(from == MIRType::Int32);
387 0 : masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output()));
388 0 : break;
389 : case MIRType::Double:
390 : case MIRType::Int64:
391 0 : MOZ_CRASH("not handled by this LIR opcode");
392 : default:
393 0 : MOZ_CRASH("unexpected WasmReinterpret");
394 : }
395 0 : }
396 :
397 : void
398 0 : CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool)
399 : {
400 0 : switch (ool->viewType()) {
401 : case Scalar::Int64:
402 : case Scalar::Float32x4:
403 : case Scalar::Int8x16:
404 : case Scalar::Int16x8:
405 : case Scalar::Int32x4:
406 : case Scalar::MaxTypedArrayViewType:
407 0 : MOZ_CRASH("unexpected array type");
408 : case Scalar::Float32:
409 0 : masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu());
410 0 : break;
411 : case Scalar::Float64:
412 0 : masm.loadConstantDouble(GenericNaN(), ool->dest().fpu());
413 0 : break;
414 : case Scalar::Int8:
415 : case Scalar::Uint8:
416 : case Scalar::Int16:
417 : case Scalar::Uint16:
418 : case Scalar::Int32:
419 : case Scalar::Uint32:
420 : case Scalar::Uint8Clamped:
421 0 : Register destReg = ool->dest().gpr();
422 0 : masm.mov(ImmWord(0), destReg);
423 0 : break;
424 : }
425 0 : masm.jmp(ool->rejoin());
426 0 : }
427 :
428 : void
429 0 : CodeGeneratorX86Shared::visitWasmAddOffset(LWasmAddOffset* lir)
430 : {
431 0 : MWasmAddOffset* mir = lir->mir();
432 0 : Register base = ToRegister(lir->base());
433 0 : Register out = ToRegister(lir->output());
434 :
435 0 : if (base != out)
436 0 : masm.move32(base, out);
437 0 : masm.add32(Imm32(mir->offset()), out);
438 :
439 0 : masm.j(Assembler::CarrySet, trap(mir, wasm::Trap::OutOfBounds));
440 0 : }
441 :
442 : void
443 0 : CodeGeneratorX86Shared::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir)
444 : {
445 0 : FloatRegister input = ToFloatRegister(lir->input());
446 0 : Register output = ToRegister(lir->output());
447 :
448 0 : MWasmTruncateToInt32* mir = lir->mir();
449 0 : MIRType inputType = mir->input()->type();
450 :
451 0 : MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32);
452 :
453 0 : auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input);
454 0 : addOutOfLineCode(ool, mir);
455 :
456 0 : Label* oolEntry = ool->entry();
457 0 : if (mir->isUnsigned()) {
458 0 : if (inputType == MIRType::Double)
459 0 : masm.wasmTruncateDoubleToUInt32(input, output, oolEntry);
460 0 : else if (inputType == MIRType::Float32)
461 0 : masm.wasmTruncateFloat32ToUInt32(input, output, oolEntry);
462 : else
463 0 : MOZ_CRASH("unexpected type");
464 0 : return;
465 : }
466 :
467 0 : if (inputType == MIRType::Double)
468 0 : masm.wasmTruncateDoubleToInt32(input, output, oolEntry);
469 0 : else if (inputType == MIRType::Float32)
470 0 : masm.wasmTruncateFloat32ToInt32(input, output, oolEntry);
471 : else
472 0 : MOZ_CRASH("unexpected type");
473 :
474 0 : masm.bind(ool->rejoin());
475 : }
476 :
477 : bool
478 8 : CodeGeneratorX86Shared::generateOutOfLineCode()
479 : {
480 8 : if (!CodeGeneratorShared::generateOutOfLineCode())
481 0 : return false;
482 :
483 8 : if (deoptLabel_.used()) {
484 : // All non-table-based bailouts will go here.
485 8 : masm.bind(&deoptLabel_);
486 :
487 : // Push the frame size, so the handler can recover the IonScript.
488 8 : masm.push(Imm32(frameSize()));
489 :
490 8 : JitCode* handler = gen->jitRuntime()->getGenericBailoutHandler();
491 8 : masm.jmp(ImmPtr(handler->raw()), Relocation::JITCODE);
492 : }
493 :
494 8 : return !masm.oom();
495 : }
496 :
497 : class BailoutJump {
498 : Assembler::Condition cond_;
499 :
500 : public:
501 91 : explicit BailoutJump(Assembler::Condition cond) : cond_(cond)
502 91 : { }
503 : #ifdef JS_CODEGEN_X86
504 : void operator()(MacroAssembler& masm, uint8_t* code) const {
505 : masm.j(cond_, ImmPtr(code), Relocation::HARDCODED);
506 : }
507 : #endif
508 91 : void operator()(MacroAssembler& masm, Label* label) const {
509 91 : masm.j(cond_, label);
510 91 : }
511 : };
512 :
513 : class BailoutLabel {
514 : Label* label_;
515 :
516 : public:
517 279 : explicit BailoutLabel(Label* label) : label_(label)
518 279 : { }
519 : #ifdef JS_CODEGEN_X86
520 : void operator()(MacroAssembler& masm, uint8_t* code) const {
521 : masm.retarget(label_, ImmPtr(code), Relocation::HARDCODED);
522 : }
523 : #endif
524 279 : void operator()(MacroAssembler& masm, Label* label) const {
525 279 : masm.retarget(label_, label);
526 279 : }
527 : };
528 :
529 : template <typename T> void
530 370 : CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot)
531 : {
532 370 : encode(snapshot);
533 :
534 : // Though the assembler doesn't track all frame pushes, at least make sure
535 : // the known value makes sense. We can't use bailout tables if the stack
536 : // isn't properly aligned to the static frame size.
537 370 : MOZ_ASSERT_IF(frameClass_ != FrameSizeClass::None() && deoptTable_,
538 : frameClass_.frameSize() == masm.framePushed());
539 :
540 : #ifdef JS_CODEGEN_X86
541 : // On x64, bailout tables are pointless, because 16 extra bytes are
542 : // reserved per external jump, whereas it takes only 10 bytes to encode a
543 : // a non-table based bailout.
544 : if (assignBailoutId(snapshot)) {
545 : binder(masm, deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE);
546 : return;
547 : }
548 : #endif
549 :
550 : // We could not use a jump table, either because all bailout IDs were
551 : // reserved, or a jump table is not optimal for this frame size or
552 : // platform. Whatever, we will generate a lazy bailout.
553 : //
554 : // All bailout code is associated with the bytecodeSite of the block we are
555 : // bailing out from.
556 370 : InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
557 370 : OutOfLineBailout* ool = new(alloc()) OutOfLineBailout(snapshot);
558 370 : addOutOfLineCode(ool, new(alloc()) BytecodeSite(tree, tree->script()->code()));
559 :
560 370 : binder(masm, ool->entry());
561 370 : }
562 :
563 : void
564 91 : CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot)
565 : {
566 91 : bailout(BailoutJump(condition), snapshot);
567 91 : }
568 :
569 : void
570 0 : CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot)
571 : {
572 0 : MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond);
573 0 : bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot);
574 0 : }
575 :
576 : void
577 279 : CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot)
578 : {
579 279 : MOZ_ASSERT(label->used() && !label->bound());
580 279 : bailout(BailoutLabel(label), snapshot);
581 279 : }
582 :
583 : void
584 19 : CodeGeneratorX86Shared::bailout(LSnapshot* snapshot)
585 : {
586 38 : Label label;
587 19 : masm.jump(&label);
588 19 : bailoutFrom(&label, snapshot);
589 19 : }
590 :
591 : void
592 370 : CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool)
593 : {
594 370 : masm.push(Imm32(ool->snapshot()->snapshotOffset()));
595 370 : masm.jmp(&deoptLabel_);
596 370 : }
597 :
598 : void
599 0 : CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD* ins)
600 : {
601 0 : FloatRegister first = ToFloatRegister(ins->first());
602 0 : FloatRegister second = ToFloatRegister(ins->second());
603 : #ifdef DEBUG
604 0 : FloatRegister output = ToFloatRegister(ins->output());
605 0 : MOZ_ASSERT(first == output);
606 : #endif
607 :
608 0 : bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
609 :
610 0 : if (ins->mir()->isMax())
611 0 : masm.maxDouble(second, first, handleNaN);
612 : else
613 0 : masm.minDouble(second, first, handleNaN);
614 0 : }
615 :
616 : void
617 0 : CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF* ins)
618 : {
619 0 : FloatRegister first = ToFloatRegister(ins->first());
620 0 : FloatRegister second = ToFloatRegister(ins->second());
621 : #ifdef DEBUG
622 0 : FloatRegister output = ToFloatRegister(ins->output());
623 0 : MOZ_ASSERT(first == output);
624 : #endif
625 :
626 0 : bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
627 :
628 0 : if (ins->mir()->isMax())
629 0 : masm.maxFloat32(second, first, handleNaN);
630 : else
631 0 : masm.minFloat32(second, first, handleNaN);
632 0 : }
633 :
634 : void
635 0 : CodeGeneratorX86Shared::visitAbsD(LAbsD* ins)
636 : {
637 0 : FloatRegister input = ToFloatRegister(ins->input());
638 0 : MOZ_ASSERT(input == ToFloatRegister(ins->output()));
639 : // Load a value which is all ones except for the sign bit.
640 0 : ScratchDoubleScope scratch(masm);
641 0 : masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits), scratch);
642 0 : masm.vandpd(scratch, input, input);
643 0 : }
644 :
645 : void
646 0 : CodeGeneratorX86Shared::visitAbsF(LAbsF* ins)
647 : {
648 0 : FloatRegister input = ToFloatRegister(ins->input());
649 0 : MOZ_ASSERT(input == ToFloatRegister(ins->output()));
650 : // Same trick as visitAbsD above.
651 0 : ScratchFloat32Scope scratch(masm);
652 0 : masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits), scratch);
653 0 : masm.vandps(scratch, input, input);
654 0 : }
655 :
656 : void
657 0 : CodeGeneratorX86Shared::visitClzI(LClzI* ins)
658 : {
659 0 : Register input = ToRegister(ins->input());
660 0 : Register output = ToRegister(ins->output());
661 0 : bool knownNotZero = ins->mir()->operandIsNeverZero();
662 :
663 0 : masm.clz32(input, output, knownNotZero);
664 0 : }
665 :
666 : void
667 0 : CodeGeneratorX86Shared::visitCtzI(LCtzI* ins)
668 : {
669 0 : Register input = ToRegister(ins->input());
670 0 : Register output = ToRegister(ins->output());
671 0 : bool knownNotZero = ins->mir()->operandIsNeverZero();
672 :
673 0 : masm.ctz32(input, output, knownNotZero);
674 0 : }
675 :
676 : void
677 0 : CodeGeneratorX86Shared::visitPopcntI(LPopcntI* ins)
678 : {
679 0 : Register input = ToRegister(ins->input());
680 0 : Register output = ToRegister(ins->output());
681 0 : Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
682 :
683 0 : masm.popcnt32(input, output, temp);
684 0 : }
685 :
686 : void
687 0 : CodeGeneratorX86Shared::visitSqrtD(LSqrtD* ins)
688 : {
689 0 : FloatRegister input = ToFloatRegister(ins->input());
690 0 : FloatRegister output = ToFloatRegister(ins->output());
691 0 : masm.vsqrtsd(input, output, output);
692 0 : }
693 :
694 : void
695 0 : CodeGeneratorX86Shared::visitSqrtF(LSqrtF* ins)
696 : {
697 0 : FloatRegister input = ToFloatRegister(ins->input());
698 0 : FloatRegister output = ToFloatRegister(ins->output());
699 0 : masm.vsqrtss(input, output, output);
700 0 : }
701 :
702 : void
703 0 : CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD* ins)
704 : {
705 0 : FloatRegister input = ToFloatRegister(ins->input());
706 0 : FloatRegister output = ToFloatRegister(ins->output());
707 :
708 0 : ScratchDoubleScope scratch(masm);
709 :
710 0 : Label done, sqrt;
711 :
712 0 : if (!ins->mir()->operandIsNeverNegativeInfinity()) {
713 : // Branch if not -Infinity.
714 0 : masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
715 :
716 0 : Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
717 0 : if (ins->mir()->operandIsNeverNaN())
718 0 : cond = Assembler::DoubleNotEqual;
719 0 : masm.branchDouble(cond, input, scratch, &sqrt);
720 :
721 : // Math.pow(-Infinity, 0.5) == Infinity.
722 0 : masm.zeroDouble(output);
723 0 : masm.subDouble(scratch, output);
724 0 : masm.jump(&done);
725 :
726 0 : masm.bind(&sqrt);
727 : }
728 :
729 0 : if (!ins->mir()->operandIsNeverNegativeZero()) {
730 : // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). Adding 0 converts any -0 to 0.
731 0 : masm.zeroDouble(scratch);
732 0 : masm.addDouble(input, scratch);
733 0 : masm.vsqrtsd(scratch, output, output);
734 : } else {
735 0 : masm.vsqrtsd(input, output, output);
736 : }
737 :
738 0 : masm.bind(&done);
739 0 : }
740 :
741 : class OutOfLineUndoALUOperation : public OutOfLineCodeBase<CodeGeneratorX86Shared>
742 : {
743 : LInstruction* ins_;
744 :
745 : public:
746 1 : explicit OutOfLineUndoALUOperation(LInstruction* ins)
747 1 : : ins_(ins)
748 1 : { }
749 :
750 1 : virtual void accept(CodeGeneratorX86Shared* codegen) {
751 1 : codegen->visitOutOfLineUndoALUOperation(this);
752 1 : }
753 2 : LInstruction* ins() const {
754 2 : return ins_;
755 : }
756 : };
757 :
758 : void
759 8 : CodeGeneratorX86Shared::visitAddI(LAddI* ins)
760 : {
761 8 : if (ins->rhs()->isConstant())
762 8 : masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
763 : else
764 0 : masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
765 :
766 8 : if (ins->snapshot()) {
767 1 : if (ins->recoversInput()) {
768 1 : OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
769 1 : addOutOfLineCode(ool, ins->mir());
770 1 : masm.j(Assembler::Overflow, ool->entry());
771 : } else {
772 0 : bailoutIf(Assembler::Overflow, ins->snapshot());
773 : }
774 : }
775 8 : }
776 :
777 : void
778 0 : CodeGeneratorX86Shared::visitAddI64(LAddI64* lir)
779 : {
780 0 : const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs);
781 0 : const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs);
782 :
783 0 : MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
784 :
785 0 : if (IsConstant(rhs)) {
786 0 : masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
787 0 : return;
788 : }
789 :
790 0 : masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
791 : }
792 :
793 : void
794 0 : CodeGeneratorX86Shared::visitSubI(LSubI* ins)
795 : {
796 0 : if (ins->rhs()->isConstant())
797 0 : masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
798 : else
799 0 : masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
800 :
801 0 : if (ins->snapshot()) {
802 0 : if (ins->recoversInput()) {
803 0 : OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
804 0 : addOutOfLineCode(ool, ins->mir());
805 0 : masm.j(Assembler::Overflow, ool->entry());
806 : } else {
807 0 : bailoutIf(Assembler::Overflow, ins->snapshot());
808 : }
809 : }
810 0 : }
811 :
812 : void
813 0 : CodeGeneratorX86Shared::visitSubI64(LSubI64* lir)
814 : {
815 0 : const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
816 0 : const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
817 :
818 0 : MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
819 :
820 0 : if (IsConstant(rhs)) {
821 0 : masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
822 0 : return;
823 : }
824 :
825 0 : masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
826 : }
827 :
828 : void
829 1 : CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool)
830 : {
831 1 : LInstruction* ins = ool->ins();
832 1 : Register reg = ToRegister(ins->getDef(0));
833 :
834 2 : DebugOnly<LAllocation*> lhs = ins->getOperand(0);
835 1 : LAllocation* rhs = ins->getOperand(1);
836 :
837 1 : MOZ_ASSERT(reg == ToRegister(lhs));
838 1 : MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs));
839 :
840 : // Undo the effect of the ALU operation, which was performed on the output
841 : // register and overflowed. Writing to the output register clobbered an
842 : // input reg, and the original value of the input needs to be recovered
843 : // to satisfy the constraint imposed by any RECOVERED_INPUT operands to
844 : // the bailout snapshot.
845 :
846 1 : if (rhs->isConstant()) {
847 1 : Imm32 constant(ToInt32(rhs));
848 1 : if (ins->isAddI())
849 1 : masm.subl(constant, reg);
850 : else
851 0 : masm.addl(constant, reg);
852 : } else {
853 0 : if (ins->isAddI())
854 0 : masm.subl(ToOperand(rhs), reg);
855 : else
856 0 : masm.addl(ToOperand(rhs), reg);
857 : }
858 :
859 1 : bailout(ool->ins()->snapshot());
860 1 : }
861 :
862 : class MulNegativeZeroCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
863 : {
864 : LMulI* ins_;
865 :
866 : public:
867 0 : explicit MulNegativeZeroCheck(LMulI* ins)
868 0 : : ins_(ins)
869 0 : { }
870 :
871 0 : virtual void accept(CodeGeneratorX86Shared* codegen) {
872 0 : codegen->visitMulNegativeZeroCheck(this);
873 0 : }
874 0 : LMulI* ins() const {
875 0 : return ins_;
876 : }
877 : };
878 :
879 : void
880 0 : CodeGeneratorX86Shared::visitMulI(LMulI* ins)
881 : {
882 0 : const LAllocation* lhs = ins->lhs();
883 0 : const LAllocation* rhs = ins->rhs();
884 0 : MMul* mul = ins->mir();
885 0 : MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow());
886 :
887 0 : if (rhs->isConstant()) {
888 : // Bailout on -0.0
889 0 : int32_t constant = ToInt32(rhs);
890 0 : if (mul->canBeNegativeZero() && constant <= 0) {
891 0 : Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal;
892 0 : masm.test32(ToRegister(lhs), ToRegister(lhs));
893 0 : bailoutIf(bailoutCond, ins->snapshot());
894 : }
895 :
896 0 : switch (constant) {
897 : case -1:
898 0 : masm.negl(ToOperand(lhs));
899 0 : break;
900 : case 0:
901 0 : masm.xorl(ToOperand(lhs), ToRegister(lhs));
902 0 : return; // escape overflow check;
903 : case 1:
904 : // nop
905 0 : return; // escape overflow check;
906 : case 2:
907 0 : masm.addl(ToOperand(lhs), ToRegister(lhs));
908 0 : break;
909 : default:
910 0 : if (!mul->canOverflow() && constant > 0) {
911 : // Use shift if cannot overflow and constant is power of 2
912 0 : int32_t shift = FloorLog2(constant);
913 0 : if ((1 << shift) == constant) {
914 0 : masm.shll(Imm32(shift), ToRegister(lhs));
915 0 : return;
916 : }
917 : }
918 0 : masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
919 : }
920 :
921 : // Bailout on overflow
922 0 : if (mul->canOverflow())
923 0 : bailoutIf(Assembler::Overflow, ins->snapshot());
924 : } else {
925 0 : masm.imull(ToOperand(rhs), ToRegister(lhs));
926 :
927 : // Bailout on overflow
928 0 : if (mul->canOverflow())
929 0 : bailoutIf(Assembler::Overflow, ins->snapshot());
930 :
931 0 : if (mul->canBeNegativeZero()) {
932 : // Jump to an OOL path if the result is 0.
933 0 : MulNegativeZeroCheck* ool = new(alloc()) MulNegativeZeroCheck(ins);
934 0 : addOutOfLineCode(ool, mul);
935 :
936 0 : masm.test32(ToRegister(lhs), ToRegister(lhs));
937 0 : masm.j(Assembler::Zero, ool->entry());
938 0 : masm.bind(ool->rejoin());
939 : }
940 : }
941 : }
942 :
943 : void
944 0 : CodeGeneratorX86Shared::visitMulI64(LMulI64* lir)
945 : {
946 0 : const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
947 0 : const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
948 :
949 0 : MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
950 :
951 0 : if (IsConstant(rhs)) {
952 0 : int64_t constant = ToInt64(rhs);
953 0 : switch (constant) {
954 : case -1:
955 0 : masm.neg64(ToRegister64(lhs));
956 0 : return;
957 : case 0:
958 0 : masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
959 0 : return;
960 : case 1:
961 : // nop
962 0 : return;
963 : case 2:
964 0 : masm.add64(ToRegister64(lhs), ToRegister64(lhs));
965 0 : return;
966 : default:
967 0 : if (constant > 0) {
968 : // Use shift if constant is power of 2.
969 0 : int32_t shift = mozilla::FloorLog2(constant);
970 0 : if (int64_t(1) << shift == constant) {
971 0 : masm.lshift64(Imm32(shift), ToRegister64(lhs));
972 0 : return;
973 : }
974 : }
975 0 : Register temp = ToTempRegisterOrInvalid(lir->temp());
976 0 : masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
977 : }
978 : } else {
979 0 : Register temp = ToTempRegisterOrInvalid(lir->temp());
980 0 : masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
981 : }
982 : }
983 :
984 : class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared>
985 : {
986 : Register reg_;
987 :
988 : public:
989 0 : explicit ReturnZero(Register reg)
990 0 : : reg_(reg)
991 0 : { }
992 :
993 0 : virtual void accept(CodeGeneratorX86Shared* codegen) {
994 0 : codegen->visitReturnZero(this);
995 0 : }
996 0 : Register reg() const {
997 0 : return reg_;
998 : }
999 : };
1000 :
1001 : void
1002 0 : CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool)
1003 : {
1004 0 : masm.mov(ImmWord(0), ool->reg());
1005 0 : masm.jmp(ool->rejoin());
1006 0 : }
1007 :
1008 : void
1009 0 : CodeGeneratorX86Shared::visitUDivOrMod(LUDivOrMod* ins)
1010 : {
1011 0 : Register lhs = ToRegister(ins->lhs());
1012 0 : Register rhs = ToRegister(ins->rhs());
1013 0 : Register output = ToRegister(ins->output());
1014 :
1015 0 : MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
1016 0 : MOZ_ASSERT(rhs != edx);
1017 0 : MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx);
1018 :
1019 0 : ReturnZero* ool = nullptr;
1020 :
1021 : // Put the lhs in eax.
1022 0 : if (lhs != eax)
1023 0 : masm.mov(lhs, eax);
1024 :
1025 : // Prevent divide by zero.
1026 0 : if (ins->canBeDivideByZero()) {
1027 0 : masm.test32(rhs, rhs);
1028 0 : if (ins->mir()->isTruncated()) {
1029 0 : if (ins->trapOnError()) {
1030 0 : masm.j(Assembler::Zero, trap(ins, wasm::Trap::IntegerDivideByZero));
1031 : } else {
1032 0 : ool = new(alloc()) ReturnZero(output);
1033 0 : masm.j(Assembler::Zero, ool->entry());
1034 : }
1035 : } else {
1036 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1037 : }
1038 : }
1039 :
1040 : // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit.
1041 0 : masm.mov(ImmWord(0), edx);
1042 0 : masm.udiv(rhs);
1043 :
1044 : // If the remainder is > 0, bailout since this must be a double.
1045 0 : if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) {
1046 0 : Register remainder = ToRegister(ins->remainder());
1047 0 : masm.test32(remainder, remainder);
1048 0 : bailoutIf(Assembler::NonZero, ins->snapshot());
1049 : }
1050 :
1051 : // Unsigned div or mod can return a value that's not a signed int32.
1052 : // If our users aren't expecting that, bail.
1053 0 : if (!ins->mir()->isTruncated()) {
1054 0 : masm.test32(output, output);
1055 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1056 : }
1057 :
1058 0 : if (ool) {
1059 0 : addOutOfLineCode(ool, ins->mir());
1060 0 : masm.bind(ool->rejoin());
1061 : }
1062 0 : }
1063 :
1064 : void
1065 0 : CodeGeneratorX86Shared::visitUDivOrModConstant(LUDivOrModConstant *ins) {
1066 0 : Register lhs = ToRegister(ins->numerator());
1067 0 : Register output = ToRegister(ins->output());
1068 0 : uint32_t d = ins->denominator();
1069 :
1070 : // This emits the division answer into edx or the modulus answer into eax.
1071 0 : MOZ_ASSERT(output == eax || output == edx);
1072 0 : MOZ_ASSERT(lhs != eax && lhs != edx);
1073 0 : bool isDiv = (output == edx);
1074 :
1075 0 : if (d == 0) {
1076 0 : if (ins->mir()->isTruncated()) {
1077 0 : if (ins->trapOnError())
1078 0 : masm.jump(trap(ins, wasm::Trap::IntegerDivideByZero));
1079 : else
1080 0 : masm.xorl(output, output);
1081 : } else {
1082 0 : bailout(ins->snapshot());
1083 : }
1084 0 : return;
1085 : }
1086 :
1087 : // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
1088 0 : MOZ_ASSERT((d & (d - 1)) != 0);
1089 :
1090 0 : ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32);
1091 :
1092 : // We first compute (M * n) >> 32, where M = rmc.multiplier.
1093 0 : masm.movl(Imm32(rmc.multiplier), eax);
1094 0 : masm.umull(lhs);
1095 0 : if (rmc.multiplier > UINT32_MAX) {
1096 : // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
1097 : // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, contradicting
1098 : // the proof of correctness in computeDivisionConstants.
1099 0 : MOZ_ASSERT(rmc.shiftAmount > 0);
1100 0 : MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
1101 :
1102 : // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since
1103 : // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can
1104 : // correct for the overflow. This case is a bit trickier than the signed
1105 : // case, though, as the (edx + n) addition itself can overflow; however,
1106 : // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1),
1107 : // which is overflow-free. See Hacker's Delight, section 10-8 for details.
1108 :
1109 : // Compute (n - edx) >> 1 into eax.
1110 0 : masm.movl(lhs, eax);
1111 0 : masm.subl(edx, eax);
1112 0 : masm.shrl(Imm32(1), eax);
1113 :
1114 : // Finish the computation.
1115 0 : masm.addl(eax, edx);
1116 0 : masm.shrl(Imm32(rmc.shiftAmount - 1), edx);
1117 : } else {
1118 0 : masm.shrl(Imm32(rmc.shiftAmount), edx);
1119 : }
1120 :
1121 : // We now have the truncated division value in edx. If we're
1122 : // computing a modulus or checking whether the division resulted
1123 : // in an integer, we need to multiply the obtained value by d and
1124 : // finish the computation/check.
1125 0 : if (!isDiv) {
1126 0 : masm.imull(Imm32(d), edx, edx);
1127 0 : masm.movl(lhs, eax);
1128 0 : masm.subl(edx, eax);
1129 :
1130 : // The final result of the modulus op, just computed above by the
1131 : // sub instruction, can be a number in the range [2^31, 2^32). If
1132 : // this is the case and the modulus is not truncated, we must bail
1133 : // out.
1134 0 : if (!ins->mir()->isTruncated())
1135 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1136 0 : } else if (!ins->mir()->isTruncated()) {
1137 0 : masm.imull(Imm32(d), edx, eax);
1138 0 : masm.cmpl(lhs, eax);
1139 0 : bailoutIf(Assembler::NotEqual, ins->snapshot());
1140 : }
1141 : }
1142 :
1143 : void
1144 0 : CodeGeneratorX86Shared::visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool)
1145 : {
1146 0 : LMulI* ins = ool->ins();
1147 0 : Register result = ToRegister(ins->output());
1148 0 : Operand lhsCopy = ToOperand(ins->lhsCopy());
1149 0 : Operand rhs = ToOperand(ins->rhs());
1150 0 : MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code());
1151 :
1152 : // Result is -0 if lhs or rhs is negative.
1153 0 : masm.movl(lhsCopy, result);
1154 0 : masm.orl(rhs, result);
1155 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1156 :
1157 0 : masm.mov(ImmWord(0), result);
1158 0 : masm.jmp(ool->rejoin());
1159 0 : }
1160 :
1161 : void
1162 0 : CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI* ins)
1163 : {
1164 0 : Register lhs = ToRegister(ins->numerator());
1165 0 : DebugOnly<Register> output = ToRegister(ins->output());
1166 :
1167 0 : int32_t shift = ins->shift();
1168 0 : bool negativeDivisor = ins->negativeDivisor();
1169 0 : MDiv* mir = ins->mir();
1170 :
1171 : // We use defineReuseInput so these should always be the same, which is
1172 : // convenient since all of our instructions here are two-address.
1173 0 : MOZ_ASSERT(lhs == output);
1174 :
1175 0 : if (!mir->isTruncated() && negativeDivisor) {
1176 : // 0 divided by a negative number must return a double.
1177 0 : masm.test32(lhs, lhs);
1178 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1179 : }
1180 :
1181 0 : if (shift) {
1182 0 : if (!mir->isTruncated()) {
1183 : // If the remainder is != 0, bailout since this must be a double.
1184 0 : masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift)));
1185 0 : bailoutIf(Assembler::NonZero, ins->snapshot());
1186 : }
1187 :
1188 0 : if (mir->isUnsigned()) {
1189 0 : masm.shrl(Imm32(shift), lhs);
1190 : } else {
1191 : // Adjust the value so that shifting produces a correctly
1192 : // rounded result when the numerator is negative. See 10-1
1193 : // "Signed Division by a Known Power of 2" in Henry
1194 : // S. Warren, Jr.'s Hacker's Delight.
1195 0 : if (mir->canBeNegativeDividend()) {
1196 0 : Register lhsCopy = ToRegister(ins->numeratorCopy());
1197 0 : MOZ_ASSERT(lhsCopy != lhs);
1198 0 : if (shift > 1)
1199 0 : masm.sarl(Imm32(31), lhs);
1200 0 : masm.shrl(Imm32(32 - shift), lhs);
1201 0 : masm.addl(lhsCopy, lhs);
1202 : }
1203 0 : masm.sarl(Imm32(shift), lhs);
1204 :
1205 0 : if (negativeDivisor)
1206 0 : masm.negl(lhs);
1207 : }
1208 0 : return;
1209 : }
1210 :
1211 0 : if (negativeDivisor) {
1212 : // INT32_MIN / -1 overflows.
1213 0 : masm.negl(lhs);
1214 0 : if (!mir->isTruncated())
1215 0 : bailoutIf(Assembler::Overflow, ins->snapshot());
1216 0 : else if (mir->trapOnError())
1217 0 : masm.j(Assembler::Overflow, trap(mir, wasm::Trap::IntegerOverflow));
1218 0 : } else if (mir->isUnsigned() && !mir->isTruncated()) {
1219 : // Unsigned division by 1 can overflow if output is not
1220 : // truncated.
1221 0 : masm.test32(lhs, lhs);
1222 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1223 : }
1224 : }
1225 :
1226 : void
1227 0 : CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI* ins) {
1228 0 : Register lhs = ToRegister(ins->numerator());
1229 0 : Register output = ToRegister(ins->output());
1230 0 : int32_t d = ins->denominator();
1231 :
1232 : // This emits the division answer into edx or the modulus answer into eax.
1233 0 : MOZ_ASSERT(output == eax || output == edx);
1234 0 : MOZ_ASSERT(lhs != eax && lhs != edx);
1235 0 : bool isDiv = (output == edx);
1236 :
1237 : // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
1238 : // and LModPowTwoI).
1239 0 : MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
1240 :
1241 : // We will first divide by Abs(d), and negate the answer if d is negative.
1242 : // If desired, this can be avoided by generalizing computeDivisionConstants.
1243 0 : ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d), /* maxLog = */ 31);
1244 :
1245 : // We first compute (M * n) >> 32, where M = rmc.multiplier.
1246 0 : masm.movl(Imm32(rmc.multiplier), eax);
1247 0 : masm.imull(lhs);
1248 0 : if (rmc.multiplier > INT32_MAX) {
1249 0 : MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
1250 :
1251 : // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since
1252 : // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow.
1253 : // (edx + n) can't overflow, as n and edx have opposite signs because int32_t(M)
1254 : // is negative.
1255 0 : masm.addl(lhs, edx);
1256 : }
1257 : // (M * n) >> (32 + shift) is the truncated division answer if n is non-negative,
1258 : // as proved in the comments of computeDivisionConstants. We must add 1 later if n is
1259 : // negative to get the right answer in all cases.
1260 0 : masm.sarl(Imm32(rmc.shiftAmount), edx);
1261 :
1262 : // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
1263 : // computed with just a sign-extending shift of 31 bits.
1264 0 : if (ins->canBeNegativeDividend()) {
1265 0 : masm.movl(lhs, eax);
1266 0 : masm.sarl(Imm32(31), eax);
1267 0 : masm.subl(eax, edx);
1268 : }
1269 :
1270 : // After this, edx contains the correct truncated division result.
1271 0 : if (d < 0)
1272 0 : masm.negl(edx);
1273 :
1274 0 : if (!isDiv) {
1275 0 : masm.imull(Imm32(-d), edx, eax);
1276 0 : masm.addl(lhs, eax);
1277 : }
1278 :
1279 0 : if (!ins->mir()->isTruncated()) {
1280 0 : if (isDiv) {
1281 : // This is a division op. Multiply the obtained value by d to check if
1282 : // the correct answer is an integer. This cannot overflow, since |d| > 1.
1283 0 : masm.imull(Imm32(d), edx, eax);
1284 0 : masm.cmp32(lhs, eax);
1285 0 : bailoutIf(Assembler::NotEqual, ins->snapshot());
1286 :
1287 : // If lhs is zero and the divisor is negative, the answer should have
1288 : // been -0.
1289 0 : if (d < 0) {
1290 0 : masm.test32(lhs, lhs);
1291 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1292 : }
1293 0 : } else if (ins->canBeNegativeDividend()) {
1294 : // This is a mod op. If the computed value is zero and lhs
1295 : // is negative, the answer should have been -0.
1296 0 : Label done;
1297 :
1298 0 : masm.cmp32(lhs, Imm32(0));
1299 0 : masm.j(Assembler::GreaterThanOrEqual, &done);
1300 :
1301 0 : masm.test32(eax, eax);
1302 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1303 :
1304 0 : masm.bind(&done);
1305 : }
1306 : }
1307 0 : }
1308 :
1309 : void
1310 0 : CodeGeneratorX86Shared::visitDivI(LDivI* ins)
1311 : {
1312 0 : Register remainder = ToRegister(ins->remainder());
1313 0 : Register lhs = ToRegister(ins->lhs());
1314 0 : Register rhs = ToRegister(ins->rhs());
1315 0 : Register output = ToRegister(ins->output());
1316 :
1317 0 : MDiv* mir = ins->mir();
1318 :
1319 0 : MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
1320 0 : MOZ_ASSERT(rhs != edx);
1321 0 : MOZ_ASSERT(remainder == edx);
1322 0 : MOZ_ASSERT(output == eax);
1323 :
1324 0 : Label done;
1325 0 : ReturnZero* ool = nullptr;
1326 :
1327 : // Put the lhs in eax, for either the negative overflow case or the regular
1328 : // divide case.
1329 0 : if (lhs != eax)
1330 0 : masm.mov(lhs, eax);
1331 :
1332 : // Handle divide by zero.
1333 0 : if (mir->canBeDivideByZero()) {
1334 0 : masm.test32(rhs, rhs);
1335 0 : if (mir->trapOnError()) {
1336 0 : masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
1337 0 : } else if (mir->canTruncateInfinities()) {
1338 : // Truncated division by zero is zero (Infinity|0 == 0)
1339 0 : if (!ool)
1340 0 : ool = new(alloc()) ReturnZero(output);
1341 0 : masm.j(Assembler::Zero, ool->entry());
1342 : } else {
1343 0 : MOZ_ASSERT(mir->fallible());
1344 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1345 : }
1346 : }
1347 :
1348 : // Handle an integer overflow exception from -2147483648 / -1.
1349 0 : if (mir->canBeNegativeOverflow()) {
1350 0 : Label notmin;
1351 0 : masm.cmp32(lhs, Imm32(INT32_MIN));
1352 0 : masm.j(Assembler::NotEqual, ¬min);
1353 0 : masm.cmp32(rhs, Imm32(-1));
1354 0 : if (mir->trapOnError()) {
1355 0 : masm.j(Assembler::Equal, trap(mir, wasm::Trap::IntegerOverflow));
1356 0 : } else if (mir->canTruncateOverflow()) {
1357 : // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the
1358 : // output register (lhs == eax).
1359 0 : masm.j(Assembler::Equal, &done);
1360 : } else {
1361 0 : MOZ_ASSERT(mir->fallible());
1362 0 : bailoutIf(Assembler::Equal, ins->snapshot());
1363 : }
1364 0 : masm.bind(¬min);
1365 : }
1366 :
1367 : // Handle negative 0.
1368 0 : if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
1369 0 : Label nonzero;
1370 0 : masm.test32(lhs, lhs);
1371 0 : masm.j(Assembler::NonZero, &nonzero);
1372 0 : masm.cmp32(rhs, Imm32(0));
1373 0 : bailoutIf(Assembler::LessThan, ins->snapshot());
1374 0 : masm.bind(&nonzero);
1375 : }
1376 :
1377 : // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit.
1378 0 : if (lhs != eax)
1379 0 : masm.mov(lhs, eax);
1380 0 : masm.cdq();
1381 0 : masm.idiv(rhs);
1382 :
1383 0 : if (!mir->canTruncateRemainder()) {
1384 : // If the remainder is > 0, bailout since this must be a double.
1385 0 : masm.test32(remainder, remainder);
1386 0 : bailoutIf(Assembler::NonZero, ins->snapshot());
1387 : }
1388 :
1389 0 : masm.bind(&done);
1390 :
1391 0 : if (ool) {
1392 0 : addOutOfLineCode(ool, mir);
1393 0 : masm.bind(ool->rejoin());
1394 : }
1395 0 : }
1396 :
1397 : void
1398 0 : CodeGeneratorX86Shared::visitModPowTwoI(LModPowTwoI* ins)
1399 : {
1400 0 : Register lhs = ToRegister(ins->getOperand(0));
1401 0 : int32_t shift = ins->shift();
1402 :
1403 0 : Label negative;
1404 :
1405 0 : if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
1406 : // Switch based on sign of the lhs.
1407 : // Positive numbers are just a bitmask
1408 0 : masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
1409 : }
1410 :
1411 0 : masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
1412 :
1413 0 : if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
1414 0 : Label done;
1415 0 : masm.jump(&done);
1416 :
1417 : // Negative numbers need a negate, bitmask, negate
1418 0 : masm.bind(&negative);
1419 :
1420 : // Unlike in the visitModI case, we are not computing the mod by means of a
1421 : // division. Therefore, the divisor = -1 case isn't problematic (the andl
1422 : // always returns 0, which is what we expect).
1423 : //
1424 : // The negl instruction overflows if lhs == INT32_MIN, but this is also not
1425 : // a problem: shift is at most 31, and so the andl also always returns 0.
1426 0 : masm.negl(lhs);
1427 0 : masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
1428 0 : masm.negl(lhs);
1429 :
1430 : // Since a%b has the same sign as b, and a is negative in this branch,
1431 : // an answer of 0 means the correct result is actually -0. Bail out.
1432 0 : if (!ins->mir()->isTruncated())
1433 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1434 0 : masm.bind(&done);
1435 : }
1436 0 : }
1437 :
1438 : class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
1439 : {
1440 : Label done_;
1441 : LModI* ins_;
1442 : Register rhs_;
1443 :
1444 : public:
1445 0 : explicit ModOverflowCheck(LModI* ins, Register rhs)
1446 0 : : ins_(ins), rhs_(rhs)
1447 0 : { }
1448 :
1449 0 : virtual void accept(CodeGeneratorX86Shared* codegen) {
1450 0 : codegen->visitModOverflowCheck(this);
1451 0 : }
1452 0 : Label* done() {
1453 0 : return &done_;
1454 : }
1455 0 : LModI* ins() const {
1456 0 : return ins_;
1457 : }
1458 0 : Register rhs() const {
1459 0 : return rhs_;
1460 : }
1461 : };
1462 :
1463 : void
1464 0 : CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool)
1465 : {
1466 0 : masm.cmp32(ool->rhs(), Imm32(-1));
1467 0 : if (ool->ins()->mir()->isTruncated()) {
1468 0 : masm.j(Assembler::NotEqual, ool->rejoin());
1469 0 : masm.mov(ImmWord(0), edx);
1470 0 : masm.jmp(ool->done());
1471 : } else {
1472 0 : bailoutIf(Assembler::Equal, ool->ins()->snapshot());
1473 0 : masm.jmp(ool->rejoin());
1474 : }
1475 0 : }
1476 :
1477 : void
1478 0 : CodeGeneratorX86Shared::visitModI(LModI* ins)
1479 : {
1480 0 : Register remainder = ToRegister(ins->remainder());
1481 0 : Register lhs = ToRegister(ins->lhs());
1482 0 : Register rhs = ToRegister(ins->rhs());
1483 :
1484 : // Required to use idiv.
1485 0 : MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
1486 0 : MOZ_ASSERT(rhs != edx);
1487 0 : MOZ_ASSERT(remainder == edx);
1488 0 : MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax);
1489 :
1490 0 : Label done;
1491 0 : ReturnZero* ool = nullptr;
1492 0 : ModOverflowCheck* overflow = nullptr;
1493 :
1494 : // Set up eax in preparation for doing a div.
1495 0 : if (lhs != eax)
1496 0 : masm.mov(lhs, eax);
1497 :
1498 0 : MMod* mir = ins->mir();
1499 :
1500 : // Prevent divide by zero.
1501 0 : if (mir->canBeDivideByZero()) {
1502 0 : masm.test32(rhs, rhs);
1503 0 : if (mir->isTruncated()) {
1504 0 : if (mir->trapOnError()) {
1505 0 : masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
1506 : } else {
1507 0 : if (!ool)
1508 0 : ool = new(alloc()) ReturnZero(edx);
1509 0 : masm.j(Assembler::Zero, ool->entry());
1510 : }
1511 : } else {
1512 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1513 : }
1514 : }
1515 :
1516 0 : Label negative;
1517 :
1518 : // Switch based on sign of the lhs.
1519 0 : if (mir->canBeNegativeDividend())
1520 0 : masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
1521 :
1522 : // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive.
1523 : {
1524 : // Check if rhs is a power-of-two.
1525 0 : if (mir->canBePowerOfTwoDivisor()) {
1526 0 : MOZ_ASSERT(rhs != remainder);
1527 :
1528 : // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if
1529 : // y is any negative number other than INT32_MIN, both y and
1530 : // y-1 will have the sign bit set so these are never optimized
1531 : // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX
1532 : // and because lhs >= 0 at this point, lhs & INT32_MAX returns
1533 : // the correct value.
1534 0 : Label notPowerOfTwo;
1535 0 : masm.mov(rhs, remainder);
1536 0 : masm.subl(Imm32(1), remainder);
1537 0 : masm.branchTest32(Assembler::NonZero, remainder, rhs, ¬PowerOfTwo);
1538 : {
1539 0 : masm.andl(lhs, remainder);
1540 0 : masm.jmp(&done);
1541 : }
1542 0 : masm.bind(¬PowerOfTwo);
1543 : }
1544 :
1545 : // Since lhs >= 0, the sign-extension will be 0
1546 0 : masm.mov(ImmWord(0), edx);
1547 0 : masm.idiv(rhs);
1548 : }
1549 :
1550 : // Otherwise, we have to beware of two special cases:
1551 0 : if (mir->canBeNegativeDividend()) {
1552 0 : masm.jump(&done);
1553 :
1554 0 : masm.bind(&negative);
1555 :
1556 : // Prevent an integer overflow exception from -2147483648 % -1
1557 0 : Label notmin;
1558 0 : masm.cmp32(lhs, Imm32(INT32_MIN));
1559 0 : overflow = new(alloc()) ModOverflowCheck(ins, rhs);
1560 0 : masm.j(Assembler::Equal, overflow->entry());
1561 0 : masm.bind(overflow->rejoin());
1562 0 : masm.cdq();
1563 0 : masm.idiv(rhs);
1564 :
1565 0 : if (!mir->isTruncated()) {
1566 : // A remainder of 0 means that the rval must be -0, which is a double.
1567 0 : masm.test32(remainder, remainder);
1568 0 : bailoutIf(Assembler::Zero, ins->snapshot());
1569 : }
1570 : }
1571 :
1572 0 : masm.bind(&done);
1573 :
1574 0 : if (overflow) {
1575 0 : addOutOfLineCode(overflow, mir);
1576 0 : masm.bind(overflow->done());
1577 : }
1578 :
1579 0 : if (ool) {
1580 0 : addOutOfLineCode(ool, mir);
1581 0 : masm.bind(ool->rejoin());
1582 : }
1583 0 : }
1584 :
1585 : void
1586 0 : CodeGeneratorX86Shared::visitBitNotI(LBitNotI* ins)
1587 : {
1588 0 : const LAllocation* input = ins->getOperand(0);
1589 0 : MOZ_ASSERT(!input->isConstant());
1590 :
1591 0 : masm.notl(ToOperand(input));
1592 0 : }
1593 :
1594 : void
1595 0 : CodeGeneratorX86Shared::visitBitOpI(LBitOpI* ins)
1596 : {
1597 0 : const LAllocation* lhs = ins->getOperand(0);
1598 0 : const LAllocation* rhs = ins->getOperand(1);
1599 :
1600 0 : switch (ins->bitop()) {
1601 : case JSOP_BITOR:
1602 0 : if (rhs->isConstant())
1603 0 : masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs));
1604 : else
1605 0 : masm.orl(ToOperand(rhs), ToRegister(lhs));
1606 0 : break;
1607 : case JSOP_BITXOR:
1608 0 : if (rhs->isConstant())
1609 0 : masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs));
1610 : else
1611 0 : masm.xorl(ToOperand(rhs), ToRegister(lhs));
1612 0 : break;
1613 : case JSOP_BITAND:
1614 0 : if (rhs->isConstant())
1615 0 : masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs));
1616 : else
1617 0 : masm.andl(ToOperand(rhs), ToRegister(lhs));
1618 0 : break;
1619 : default:
1620 0 : MOZ_CRASH("unexpected binary opcode");
1621 : }
1622 0 : }
1623 :
1624 : void
1625 0 : CodeGeneratorX86Shared::visitBitOpI64(LBitOpI64* lir)
1626 : {
1627 0 : const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs);
1628 0 : const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs);
1629 :
1630 0 : MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
1631 :
1632 0 : switch (lir->bitop()) {
1633 : case JSOP_BITOR:
1634 0 : if (IsConstant(rhs))
1635 0 : masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
1636 : else
1637 0 : masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
1638 0 : break;
1639 : case JSOP_BITXOR:
1640 0 : if (IsConstant(rhs))
1641 0 : masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
1642 : else
1643 0 : masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
1644 0 : break;
1645 : case JSOP_BITAND:
1646 0 : if (IsConstant(rhs))
1647 0 : masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
1648 : else
1649 0 : masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
1650 0 : break;
1651 : default:
1652 0 : MOZ_CRASH("unexpected binary opcode");
1653 : }
1654 0 : }
1655 :
1656 : void
1657 0 : CodeGeneratorX86Shared::visitShiftI(LShiftI* ins)
1658 : {
1659 0 : Register lhs = ToRegister(ins->lhs());
1660 0 : const LAllocation* rhs = ins->rhs();
1661 :
1662 0 : if (rhs->isConstant()) {
1663 0 : int32_t shift = ToInt32(rhs) & 0x1F;
1664 0 : switch (ins->bitop()) {
1665 : case JSOP_LSH:
1666 0 : if (shift)
1667 0 : masm.shll(Imm32(shift), lhs);
1668 0 : break;
1669 : case JSOP_RSH:
1670 0 : if (shift)
1671 0 : masm.sarl(Imm32(shift), lhs);
1672 0 : break;
1673 : case JSOP_URSH:
1674 0 : if (shift) {
1675 0 : masm.shrl(Imm32(shift), lhs);
1676 0 : } else if (ins->mir()->toUrsh()->fallible()) {
1677 : // x >>> 0 can overflow.
1678 0 : masm.test32(lhs, lhs);
1679 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1680 : }
1681 0 : break;
1682 : default:
1683 0 : MOZ_CRASH("Unexpected shift op");
1684 : }
1685 : } else {
1686 0 : MOZ_ASSERT(ToRegister(rhs) == ecx);
1687 0 : switch (ins->bitop()) {
1688 : case JSOP_LSH:
1689 0 : masm.shll_cl(lhs);
1690 0 : break;
1691 : case JSOP_RSH:
1692 0 : masm.sarl_cl(lhs);
1693 0 : break;
1694 : case JSOP_URSH:
1695 0 : masm.shrl_cl(lhs);
1696 0 : if (ins->mir()->toUrsh()->fallible()) {
1697 : // x >>> 0 can overflow.
1698 0 : masm.test32(lhs, lhs);
1699 0 : bailoutIf(Assembler::Signed, ins->snapshot());
1700 : }
1701 0 : break;
1702 : default:
1703 0 : MOZ_CRASH("Unexpected shift op");
1704 : }
1705 : }
1706 0 : }
1707 :
1708 : void
1709 0 : CodeGeneratorX86Shared::visitShiftI64(LShiftI64* lir)
1710 : {
1711 0 : const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
1712 0 : LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
1713 :
1714 0 : MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
1715 :
1716 0 : if (rhs->isConstant()) {
1717 0 : int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
1718 0 : switch (lir->bitop()) {
1719 : case JSOP_LSH:
1720 0 : if (shift)
1721 0 : masm.lshift64(Imm32(shift), ToRegister64(lhs));
1722 0 : break;
1723 : case JSOP_RSH:
1724 0 : if (shift)
1725 0 : masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
1726 0 : break;
1727 : case JSOP_URSH:
1728 0 : if (shift)
1729 0 : masm.rshift64(Imm32(shift), ToRegister64(lhs));
1730 0 : break;
1731 : default:
1732 0 : MOZ_CRASH("Unexpected shift op");
1733 : }
1734 0 : return;
1735 : }
1736 :
1737 0 : MOZ_ASSERT(ToRegister(rhs) == ecx);
1738 0 : switch (lir->bitop()) {
1739 : case JSOP_LSH:
1740 0 : masm.lshift64(ecx, ToRegister64(lhs));
1741 0 : break;
1742 : case JSOP_RSH:
1743 0 : masm.rshift64Arithmetic(ecx, ToRegister64(lhs));
1744 0 : break;
1745 : case JSOP_URSH:
1746 0 : masm.rshift64(ecx, ToRegister64(lhs));
1747 0 : break;
1748 : default:
1749 0 : MOZ_CRASH("Unexpected shift op");
1750 : }
1751 : }
1752 :
1753 : void
1754 0 : CodeGeneratorX86Shared::visitUrshD(LUrshD* ins)
1755 : {
1756 0 : Register lhs = ToRegister(ins->lhs());
1757 0 : MOZ_ASSERT(ToRegister(ins->temp()) == lhs);
1758 :
1759 0 : const LAllocation* rhs = ins->rhs();
1760 0 : FloatRegister out = ToFloatRegister(ins->output());
1761 :
1762 0 : if (rhs->isConstant()) {
1763 0 : int32_t shift = ToInt32(rhs) & 0x1F;
1764 0 : if (shift)
1765 0 : masm.shrl(Imm32(shift), lhs);
1766 : } else {
1767 0 : MOZ_ASSERT(ToRegister(rhs) == ecx);
1768 0 : masm.shrl_cl(lhs);
1769 : }
1770 :
1771 0 : masm.convertUInt32ToDouble(lhs, out);
1772 0 : }
1773 :
1774 : Operand
1775 125 : CodeGeneratorX86Shared::ToOperand(const LAllocation& a)
1776 : {
1777 125 : if (a.isGeneralReg())
1778 93 : return Operand(a.toGeneralReg()->reg());
1779 32 : if (a.isFloatReg())
1780 0 : return Operand(a.toFloatReg()->reg());
1781 32 : return Operand(masm.getStackPointer(), ToStackOffset(&a));
1782 : }
1783 :
1784 : Operand
1785 125 : CodeGeneratorX86Shared::ToOperand(const LAllocation* a)
1786 : {
1787 125 : return ToOperand(*a);
1788 : }
1789 :
1790 : Operand
1791 0 : CodeGeneratorX86Shared::ToOperand(const LDefinition* def)
1792 : {
1793 0 : return ToOperand(def->output());
1794 : }
1795 :
1796 : MoveOperand
1797 1108 : CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const
1798 : {
1799 1108 : if (a.isGeneralReg())
1800 566 : return MoveOperand(ToRegister(a));
1801 542 : if (a.isFloatReg())
1802 0 : return MoveOperand(ToFloatRegister(a));
1803 542 : return MoveOperand(StackPointer, ToStackOffset(a));
1804 : }
1805 :
1806 : class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared>
1807 : {
1808 : MTableSwitch* mir_;
1809 : CodeLabel jumpLabel_;
1810 :
1811 0 : void accept(CodeGeneratorX86Shared* codegen) {
1812 0 : codegen->visitOutOfLineTableSwitch(this);
1813 0 : }
1814 :
1815 : public:
1816 0 : explicit OutOfLineTableSwitch(MTableSwitch* mir)
1817 0 : : mir_(mir)
1818 0 : {}
1819 :
1820 0 : MTableSwitch* mir() const {
1821 0 : return mir_;
1822 : }
1823 :
1824 0 : CodeLabel* jumpLabel() {
1825 0 : return &jumpLabel_;
1826 : }
1827 : };
1828 :
1829 : void
1830 0 : CodeGeneratorX86Shared::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool)
1831 : {
1832 0 : MTableSwitch* mir = ool->mir();
1833 :
1834 0 : masm.haltingAlign(sizeof(void*));
1835 0 : masm.use(ool->jumpLabel()->target());
1836 0 : masm.addCodeLabel(*ool->jumpLabel());
1837 :
1838 0 : for (size_t i = 0; i < mir->numCases(); i++) {
1839 0 : LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
1840 0 : Label* caseheader = caseblock->label();
1841 0 : uint32_t caseoffset = caseheader->offset();
1842 :
1843 : // The entries of the jump table need to be absolute addresses and thus
1844 : // must be patched after codegen is finished.
1845 0 : CodeLabel cl;
1846 0 : masm.writeCodePointer(cl.patchAt());
1847 0 : cl.target()->bind(caseoffset);
1848 0 : masm.addCodeLabel(cl);
1849 : }
1850 0 : }
1851 :
1852 : void
1853 0 : CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base)
1854 : {
1855 0 : Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
1856 :
1857 : // Lower value with low value
1858 0 : if (mir->low() != 0)
1859 0 : masm.subl(Imm32(mir->low()), index);
1860 :
1861 : // Jump to default case if input is out of range
1862 0 : int32_t cases = mir->numCases();
1863 0 : masm.cmp32(index, Imm32(cases));
1864 0 : masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase);
1865 :
1866 : // To fill in the CodeLabels for the case entries, we need to first
1867 : // generate the case entries (we don't yet know their offsets in the
1868 : // instruction stream).
1869 0 : OutOfLineTableSwitch* ool = new(alloc()) OutOfLineTableSwitch(mir);
1870 0 : addOutOfLineCode(ool, mir);
1871 :
1872 : // Compute the position where a pointer to the right case stands.
1873 0 : masm.mov(ool->jumpLabel()->patchAt(), base);
1874 0 : Operand pointer = Operand(base, index, ScalePointer);
1875 :
1876 : // Jump to the right case
1877 0 : masm.jmp(pointer);
1878 0 : }
1879 :
1880 : void
1881 0 : CodeGeneratorX86Shared::visitMathD(LMathD* math)
1882 : {
1883 0 : FloatRegister lhs = ToFloatRegister(math->lhs());
1884 0 : Operand rhs = ToOperand(math->rhs());
1885 0 : FloatRegister output = ToFloatRegister(math->output());
1886 :
1887 0 : switch (math->jsop()) {
1888 : case JSOP_ADD:
1889 0 : masm.vaddsd(rhs, lhs, output);
1890 0 : break;
1891 : case JSOP_SUB:
1892 0 : masm.vsubsd(rhs, lhs, output);
1893 0 : break;
1894 : case JSOP_MUL:
1895 0 : masm.vmulsd(rhs, lhs, output);
1896 0 : break;
1897 : case JSOP_DIV:
1898 0 : masm.vdivsd(rhs, lhs, output);
1899 0 : break;
1900 : default:
1901 0 : MOZ_CRASH("unexpected opcode");
1902 : }
1903 0 : }
1904 :
1905 : void
1906 0 : CodeGeneratorX86Shared::visitMathF(LMathF* math)
1907 : {
1908 0 : FloatRegister lhs = ToFloatRegister(math->lhs());
1909 0 : Operand rhs = ToOperand(math->rhs());
1910 0 : FloatRegister output = ToFloatRegister(math->output());
1911 :
1912 0 : switch (math->jsop()) {
1913 : case JSOP_ADD:
1914 0 : masm.vaddss(rhs, lhs, output);
1915 0 : break;
1916 : case JSOP_SUB:
1917 0 : masm.vsubss(rhs, lhs, output);
1918 0 : break;
1919 : case JSOP_MUL:
1920 0 : masm.vmulss(rhs, lhs, output);
1921 0 : break;
1922 : case JSOP_DIV:
1923 0 : masm.vdivss(rhs, lhs, output);
1924 0 : break;
1925 : default:
1926 0 : MOZ_CRASH("unexpected opcode");
1927 : }
1928 0 : }
1929 :
1930 : void
1931 0 : CodeGeneratorX86Shared::visitFloor(LFloor* lir)
1932 : {
1933 0 : FloatRegister input = ToFloatRegister(lir->input());
1934 0 : Register output = ToRegister(lir->output());
1935 :
1936 0 : Label bailout;
1937 :
1938 0 : if (AssemblerX86Shared::HasSSE41()) {
1939 : // Bail on negative-zero.
1940 0 : masm.branchNegativeZero(input, output, &bailout);
1941 0 : bailoutFrom(&bailout, lir->snapshot());
1942 :
1943 : // Round toward -Infinity.
1944 : {
1945 0 : ScratchDoubleScope scratch(masm);
1946 0 : masm.vroundsd(X86Encoding::RoundDown, input, scratch, scratch);
1947 0 : bailoutCvttsd2si(scratch, output, lir->snapshot());
1948 : }
1949 : } else {
1950 0 : Label negative, end;
1951 :
1952 : // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
1953 : {
1954 0 : ScratchDoubleScope scratch(masm);
1955 0 : masm.zeroDouble(scratch);
1956 0 : masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
1957 : }
1958 :
1959 : // Bail on negative-zero.
1960 0 : masm.branchNegativeZero(input, output, &bailout);
1961 0 : bailoutFrom(&bailout, lir->snapshot());
1962 :
1963 : // Input is non-negative, so truncation correctly rounds.
1964 0 : bailoutCvttsd2si(input, output, lir->snapshot());
1965 :
1966 0 : masm.jump(&end);
1967 :
1968 : // Input is negative, but isn't -0.
1969 : // Negative values go on a comparatively expensive path, since no
1970 : // native rounding mode matches JS semantics. Still better than callVM.
1971 0 : masm.bind(&negative);
1972 : {
1973 : // Truncate and round toward zero.
1974 : // This is off-by-one for everything but integer-valued inputs.
1975 0 : bailoutCvttsd2si(input, output, lir->snapshot());
1976 :
1977 : // Test whether the input double was integer-valued.
1978 : {
1979 0 : ScratchDoubleScope scratch(masm);
1980 0 : masm.convertInt32ToDouble(output, scratch);
1981 0 : masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
1982 : }
1983 :
1984 : // Input is not integer-valued, so we rounded off-by-one in the
1985 : // wrong direction. Correct by subtraction.
1986 0 : masm.subl(Imm32(1), output);
1987 : // Cannot overflow: output was already checked against INT_MIN.
1988 : }
1989 :
1990 0 : masm.bind(&end);
1991 : }
1992 0 : }
1993 :
1994 : void
1995 0 : CodeGeneratorX86Shared::visitFloorF(LFloorF* lir)
1996 : {
1997 0 : FloatRegister input = ToFloatRegister(lir->input());
1998 0 : Register output = ToRegister(lir->output());
1999 :
2000 0 : Label bailout;
2001 :
2002 0 : if (AssemblerX86Shared::HasSSE41()) {
2003 : // Bail on negative-zero.
2004 0 : masm.branchNegativeZeroFloat32(input, output, &bailout);
2005 0 : bailoutFrom(&bailout, lir->snapshot());
2006 :
2007 : // Round toward -Infinity.
2008 : {
2009 0 : ScratchFloat32Scope scratch(masm);
2010 0 : masm.vroundss(X86Encoding::RoundDown, input, scratch, scratch);
2011 0 : bailoutCvttss2si(scratch, output, lir->snapshot());
2012 : }
2013 : } else {
2014 0 : Label negative, end;
2015 :
2016 : // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
2017 : {
2018 0 : ScratchFloat32Scope scratch(masm);
2019 0 : masm.zeroFloat32(scratch);
2020 0 : masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
2021 : }
2022 :
2023 : // Bail on negative-zero.
2024 0 : masm.branchNegativeZeroFloat32(input, output, &bailout);
2025 0 : bailoutFrom(&bailout, lir->snapshot());
2026 :
2027 : // Input is non-negative, so truncation correctly rounds.
2028 0 : bailoutCvttss2si(input, output, lir->snapshot());
2029 :
2030 0 : masm.jump(&end);
2031 :
2032 : // Input is negative, but isn't -0.
2033 : // Negative values go on a comparatively expensive path, since no
2034 : // native rounding mode matches JS semantics. Still better than callVM.
2035 0 : masm.bind(&negative);
2036 : {
2037 : // Truncate and round toward zero.
2038 : // This is off-by-one for everything but integer-valued inputs.
2039 0 : bailoutCvttss2si(input, output, lir->snapshot());
2040 :
2041 : // Test whether the input double was integer-valued.
2042 : {
2043 0 : ScratchFloat32Scope scratch(masm);
2044 0 : masm.convertInt32ToFloat32(output, scratch);
2045 0 : masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
2046 : }
2047 :
2048 : // Input is not integer-valued, so we rounded off-by-one in the
2049 : // wrong direction. Correct by subtraction.
2050 0 : masm.subl(Imm32(1), output);
2051 : // Cannot overflow: output was already checked against INT_MIN.
2052 : }
2053 :
2054 0 : masm.bind(&end);
2055 : }
2056 0 : }
2057 :
2058 : void
2059 0 : CodeGeneratorX86Shared::visitCeil(LCeil* lir)
2060 : {
2061 0 : FloatRegister input = ToFloatRegister(lir->input());
2062 0 : ScratchDoubleScope scratch(masm);
2063 0 : Register output = ToRegister(lir->output());
2064 :
2065 0 : Label bailout, lessThanMinusOne;
2066 :
2067 : // Bail on ]-1; -0] range
2068 0 : masm.loadConstantDouble(-1, scratch);
2069 0 : masm.branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, input,
2070 0 : scratch, &lessThanMinusOne);
2071 :
2072 : // Test for remaining values with the sign bit set, i.e. ]-1; -0]
2073 0 : masm.vmovmskpd(input, output);
2074 0 : masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
2075 0 : bailoutFrom(&bailout, lir->snapshot());
2076 :
2077 0 : if (AssemblerX86Shared::HasSSE41()) {
2078 : // x <= -1 or x > -0
2079 0 : masm.bind(&lessThanMinusOne);
2080 : // Round toward +Infinity.
2081 0 : masm.vroundsd(X86Encoding::RoundUp, input, scratch, scratch);
2082 0 : bailoutCvttsd2si(scratch, output, lir->snapshot());
2083 0 : return;
2084 : }
2085 :
2086 : // No SSE4.1
2087 0 : Label end;
2088 :
2089 : // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
2090 : // integer (resp. non-integer) values.
2091 : // Will also work for values >= INT_MAX + 1, as the truncate
2092 : // operation will return INT_MIN and there'll be a bailout.
2093 0 : bailoutCvttsd2si(input, output, lir->snapshot());
2094 0 : masm.convertInt32ToDouble(output, scratch);
2095 0 : masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
2096 :
2097 : // Input is not integer-valued, add 1 to obtain the ceiling value
2098 0 : masm.addl(Imm32(1), output);
2099 : // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
2100 0 : bailoutIf(Assembler::Overflow, lir->snapshot());
2101 0 : masm.jump(&end);
2102 :
2103 : // x <= -1, truncation is the way to go.
2104 0 : masm.bind(&lessThanMinusOne);
2105 0 : bailoutCvttsd2si(input, output, lir->snapshot());
2106 :
2107 0 : masm.bind(&end);
2108 : }
2109 :
2110 : void
2111 0 : CodeGeneratorX86Shared::visitCeilF(LCeilF* lir)
2112 : {
2113 0 : FloatRegister input = ToFloatRegister(lir->input());
2114 0 : ScratchFloat32Scope scratch(masm);
2115 0 : Register output = ToRegister(lir->output());
2116 :
2117 0 : Label bailout, lessThanMinusOne;
2118 :
2119 : // Bail on ]-1; -0] range
2120 0 : masm.loadConstantFloat32(-1.f, scratch);
2121 0 : masm.branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, input,
2122 0 : scratch, &lessThanMinusOne);
2123 :
2124 : // Test for remaining values with the sign bit set, i.e. ]-1; -0]
2125 0 : masm.vmovmskps(input, output);
2126 0 : masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
2127 0 : bailoutFrom(&bailout, lir->snapshot());
2128 :
2129 0 : if (AssemblerX86Shared::HasSSE41()) {
2130 : // x <= -1 or x > -0
2131 0 : masm.bind(&lessThanMinusOne);
2132 : // Round toward +Infinity.
2133 0 : masm.vroundss(X86Encoding::RoundUp, input, scratch, scratch);
2134 0 : bailoutCvttss2si(scratch, output, lir->snapshot());
2135 0 : return;
2136 : }
2137 :
2138 : // No SSE4.1
2139 0 : Label end;
2140 :
2141 : // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
2142 : // integer (resp. non-integer) values.
2143 : // Will also work for values >= INT_MAX + 1, as the truncate
2144 : // operation will return INT_MIN and there'll be a bailout.
2145 0 : bailoutCvttss2si(input, output, lir->snapshot());
2146 0 : masm.convertInt32ToFloat32(output, scratch);
2147 0 : masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
2148 :
2149 : // Input is not integer-valued, add 1 to obtain the ceiling value
2150 0 : masm.addl(Imm32(1), output);
2151 : // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
2152 0 : bailoutIf(Assembler::Overflow, lir->snapshot());
2153 0 : masm.jump(&end);
2154 :
2155 : // x <= -1, truncation is the way to go.
2156 0 : masm.bind(&lessThanMinusOne);
2157 0 : bailoutCvttss2si(input, output, lir->snapshot());
2158 :
2159 0 : masm.bind(&end);
2160 : }
2161 :
2162 : void
2163 0 : CodeGeneratorX86Shared::visitRound(LRound* lir)
2164 : {
2165 0 : FloatRegister input = ToFloatRegister(lir->input());
2166 0 : FloatRegister temp = ToFloatRegister(lir->temp());
2167 0 : ScratchDoubleScope scratch(masm);
2168 0 : Register output = ToRegister(lir->output());
2169 :
2170 0 : Label negativeOrZero, negative, end, bailout;
2171 :
2172 : // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
2173 0 : masm.zeroDouble(scratch);
2174 0 : masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
2175 0 : masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
2176 :
2177 : // Input is positive. Add the biggest double less than 0.5 and
2178 : // truncate, rounding down (because if the input is the biggest double less
2179 : // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have
2180 : // to add the input to the temp register because we're not allowed to
2181 : // modify the input register.
2182 0 : masm.addDouble(input, temp);
2183 0 : bailoutCvttsd2si(temp, output, lir->snapshot());
2184 :
2185 0 : masm.jump(&end);
2186 :
2187 : // Input is negative, +0 or -0.
2188 0 : masm.bind(&negativeOrZero);
2189 : // Branch on negative input.
2190 0 : masm.j(Assembler::NotEqual, &negative);
2191 :
2192 : // Bail on negative-zero.
2193 0 : masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false);
2194 0 : bailoutFrom(&bailout, lir->snapshot());
2195 :
2196 : // Input is +0
2197 0 : masm.xor32(output, output);
2198 0 : masm.jump(&end);
2199 :
2200 : // Input is negative.
2201 0 : masm.bind(&negative);
2202 :
2203 : // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
2204 : // be added the biggest double less than 0.5.
2205 0 : Label loadJoin;
2206 0 : masm.loadConstantDouble(-0.5, scratch);
2207 0 : masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &loadJoin);
2208 0 : masm.loadConstantDouble(0.5, temp);
2209 0 : masm.bind(&loadJoin);
2210 :
2211 0 : if (AssemblerX86Shared::HasSSE41()) {
2212 : // Add 0.5 and round toward -Infinity. The result is stored in the temp
2213 : // register (currently contains 0.5).
2214 0 : masm.addDouble(input, temp);
2215 0 : masm.vroundsd(X86Encoding::RoundDown, temp, scratch, scratch);
2216 :
2217 : // Truncate.
2218 0 : bailoutCvttsd2si(scratch, output, lir->snapshot());
2219 :
2220 : // If the result is positive zero, then the actual result is -0. Bail.
2221 : // Otherwise, the truncation will have produced the correct negative integer.
2222 0 : masm.test32(output, output);
2223 0 : bailoutIf(Assembler::Zero, lir->snapshot());
2224 : } else {
2225 0 : masm.addDouble(input, temp);
2226 :
2227 : // Round toward -Infinity without the benefit of ROUNDSD.
2228 : {
2229 : // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
2230 0 : masm.compareDouble(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
2231 0 : bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
2232 :
2233 : // Truncate and round toward zero.
2234 : // This is off-by-one for everything but integer-valued inputs.
2235 0 : bailoutCvttsd2si(temp, output, lir->snapshot());
2236 :
2237 : // Test whether the truncated double was integer-valued.
2238 0 : masm.convertInt32ToDouble(output, scratch);
2239 0 : masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
2240 :
2241 : // Input is not integer-valued, so we rounded off-by-one in the
2242 : // wrong direction. Correct by subtraction.
2243 0 : masm.subl(Imm32(1), output);
2244 : // Cannot overflow: output was already checked against INT_MIN.
2245 : }
2246 : }
2247 :
2248 0 : masm.bind(&end);
2249 0 : }
2250 :
2251 : void
2252 0 : CodeGeneratorX86Shared::visitRoundF(LRoundF* lir)
2253 : {
2254 0 : FloatRegister input = ToFloatRegister(lir->input());
2255 0 : FloatRegister temp = ToFloatRegister(lir->temp());
2256 0 : ScratchFloat32Scope scratch(masm);
2257 0 : Register output = ToRegister(lir->output());
2258 :
2259 0 : Label negativeOrZero, negative, end, bailout;
2260 :
2261 : // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
2262 0 : masm.zeroFloat32(scratch);
2263 0 : masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
2264 0 : masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
2265 :
2266 : // Input is non-negative. Add the biggest float less than 0.5 and truncate,
2267 : // rounding down (because if the input is the biggest float less than 0.5,
2268 : // adding 0.5 would undesirably round up to 1). Note that we have to add
2269 : // the input to the temp register because we're not allowed to modify the
2270 : // input register.
2271 0 : masm.addFloat32(input, temp);
2272 :
2273 0 : bailoutCvttss2si(temp, output, lir->snapshot());
2274 :
2275 0 : masm.jump(&end);
2276 :
2277 : // Input is negative, +0 or -0.
2278 0 : masm.bind(&negativeOrZero);
2279 : // Branch on negative input.
2280 0 : masm.j(Assembler::NotEqual, &negative);
2281 :
2282 : // Bail on negative-zero.
2283 0 : masm.branchNegativeZeroFloat32(input, output, &bailout);
2284 0 : bailoutFrom(&bailout, lir->snapshot());
2285 :
2286 : // Input is +0.
2287 0 : masm.xor32(output, output);
2288 0 : masm.jump(&end);
2289 :
2290 : // Input is negative.
2291 0 : masm.bind(&negative);
2292 :
2293 : // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
2294 : // be added the biggest double less than 0.5.
2295 0 : Label loadJoin;
2296 0 : masm.loadConstantFloat32(-0.5f, scratch);
2297 0 : masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &loadJoin);
2298 0 : masm.loadConstantFloat32(0.5f, temp);
2299 0 : masm.bind(&loadJoin);
2300 :
2301 0 : if (AssemblerX86Shared::HasSSE41()) {
2302 : // Add 0.5 and round toward -Infinity. The result is stored in the temp
2303 : // register (currently contains 0.5).
2304 0 : masm.addFloat32(input, temp);
2305 0 : masm.vroundss(X86Encoding::RoundDown, temp, scratch, scratch);
2306 :
2307 : // Truncate.
2308 0 : bailoutCvttss2si(scratch, output, lir->snapshot());
2309 :
2310 : // If the result is positive zero, then the actual result is -0. Bail.
2311 : // Otherwise, the truncation will have produced the correct negative integer.
2312 0 : masm.test32(output, output);
2313 0 : bailoutIf(Assembler::Zero, lir->snapshot());
2314 : } else {
2315 0 : masm.addFloat32(input, temp);
2316 : // Round toward -Infinity without the benefit of ROUNDSS.
2317 : {
2318 : // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
2319 0 : masm.compareFloat(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
2320 0 : bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
2321 :
2322 : // Truncate and round toward zero.
2323 : // This is off-by-one for everything but integer-valued inputs.
2324 0 : bailoutCvttss2si(temp, output, lir->snapshot());
2325 :
2326 : // Test whether the truncated double was integer-valued.
2327 0 : masm.convertInt32ToFloat32(output, scratch);
2328 0 : masm.branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
2329 :
2330 : // Input is not integer-valued, so we rounded off-by-one in the
2331 : // wrong direction. Correct by subtraction.
2332 0 : masm.subl(Imm32(1), output);
2333 : // Cannot overflow: output was already checked against INT_MIN.
2334 : }
2335 : }
2336 :
2337 0 : masm.bind(&end);
2338 0 : }
2339 :
2340 : void
2341 0 : CodeGeneratorX86Shared::visitNearbyInt(LNearbyInt* lir)
2342 : {
2343 0 : FloatRegister input = ToFloatRegister(lir->input());
2344 0 : FloatRegister output = ToFloatRegister(lir->output());
2345 :
2346 0 : RoundingMode roundingMode = lir->mir()->roundingMode();
2347 0 : masm.vroundsd(Assembler::ToX86RoundingMode(roundingMode), input, output, output);
2348 0 : }
2349 :
2350 : void
2351 0 : CodeGeneratorX86Shared::visitNearbyIntF(LNearbyIntF* lir)
2352 : {
2353 0 : FloatRegister input = ToFloatRegister(lir->input());
2354 0 : FloatRegister output = ToFloatRegister(lir->output());
2355 :
2356 0 : RoundingMode roundingMode = lir->mir()->roundingMode();
2357 0 : masm.vroundss(Assembler::ToX86RoundingMode(roundingMode), input, output, output);
2358 0 : }
2359 :
2360 : void
2361 8 : CodeGeneratorX86Shared::visitGuardShape(LGuardShape* guard)
2362 : {
2363 8 : Register obj = ToRegister(guard->input());
2364 8 : masm.cmpPtr(Operand(obj, ShapedObject::offsetOfShape()), ImmGCPtr(guard->mir()->shape()));
2365 :
2366 8 : bailoutIf(Assembler::NotEqual, guard->snapshot());
2367 8 : }
2368 :
2369 : void
2370 0 : CodeGeneratorX86Shared::visitGuardObjectGroup(LGuardObjectGroup* guard)
2371 : {
2372 0 : Register obj = ToRegister(guard->input());
2373 :
2374 0 : masm.cmpPtr(Operand(obj, JSObject::offsetOfGroup()), ImmGCPtr(guard->mir()->group()));
2375 :
2376 : Assembler::Condition cond =
2377 0 : guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual;
2378 0 : bailoutIf(cond, guard->snapshot());
2379 0 : }
2380 :
2381 : void
2382 0 : CodeGeneratorX86Shared::visitGuardClass(LGuardClass* guard)
2383 : {
2384 0 : Register obj = ToRegister(guard->input());
2385 0 : Register tmp = ToRegister(guard->tempInt());
2386 :
2387 0 : masm.loadPtr(Address(obj, JSObject::offsetOfGroup()), tmp);
2388 0 : masm.cmpPtr(Operand(tmp, ObjectGroup::offsetOfClasp()), ImmPtr(guard->mir()->getClass()));
2389 0 : bailoutIf(Assembler::NotEqual, guard->snapshot());
2390 0 : }
2391 :
2392 : void
2393 0 : CodeGeneratorX86Shared::visitEffectiveAddress(LEffectiveAddress* ins)
2394 : {
2395 0 : const MEffectiveAddress* mir = ins->mir();
2396 0 : Register base = ToRegister(ins->base());
2397 0 : Register index = ToRegister(ins->index());
2398 0 : Register output = ToRegister(ins->output());
2399 0 : masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output);
2400 0 : }
2401 :
2402 : void
2403 8 : CodeGeneratorX86Shared::generateInvalidateEpilogue()
2404 : {
2405 : // Ensure that there is enough space in the buffer for the OsiPoint
2406 : // patching to occur. Otherwise, we could overwrite the invalidation
2407 : // epilogue.
2408 72 : for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize())
2409 64 : masm.nop();
2410 :
2411 8 : masm.bind(&invalidate_);
2412 :
2413 : // Push the Ion script onto the stack (when we determine what that pointer is).
2414 8 : invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
2415 8 : JitCode* thunk = gen->jitRuntime()->getInvalidationThunk();
2416 :
2417 8 : masm.call(thunk);
2418 :
2419 : // We should never reach this point in JIT code -- the invalidation thunk should
2420 : // pop the invalidated JS frame and return directly to its caller.
2421 8 : masm.assumeUnreachable("Should have returned directly to its caller instead of here.");
2422 8 : }
2423 :
2424 : void
2425 0 : CodeGeneratorX86Shared::visitNegI(LNegI* ins)
2426 : {
2427 0 : Register input = ToRegister(ins->input());
2428 0 : MOZ_ASSERT(input == ToRegister(ins->output()));
2429 :
2430 0 : masm.neg32(input);
2431 0 : }
2432 :
2433 : void
2434 0 : CodeGeneratorX86Shared::visitNegD(LNegD* ins)
2435 : {
2436 0 : FloatRegister input = ToFloatRegister(ins->input());
2437 0 : MOZ_ASSERT(input == ToFloatRegister(ins->output()));
2438 :
2439 0 : masm.negateDouble(input);
2440 0 : }
2441 :
2442 : void
2443 0 : CodeGeneratorX86Shared::visitNegF(LNegF* ins)
2444 : {
2445 0 : FloatRegister input = ToFloatRegister(ins->input());
2446 0 : MOZ_ASSERT(input == ToFloatRegister(ins->output()));
2447 :
2448 0 : masm.negateFloat(input);
2449 0 : }
2450 :
2451 : void
2452 0 : CodeGeneratorX86Shared::visitSimd128Int(LSimd128Int* ins)
2453 : {
2454 0 : const LDefinition* out = ins->getDef(0);
2455 0 : masm.loadConstantSimd128Int(ins->getValue(), ToFloatRegister(out));
2456 0 : }
2457 :
2458 : void
2459 0 : CodeGeneratorX86Shared::visitSimd128Float(LSimd128Float* ins)
2460 : {
2461 0 : const LDefinition* out = ins->getDef(0);
2462 0 : masm.loadConstantSimd128Float(ins->getValue(), ToFloatRegister(out));
2463 0 : }
2464 :
2465 : void
2466 0 : CodeGeneratorX86Shared::visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins)
2467 : {
2468 0 : FloatRegister in = ToFloatRegister(ins->input());
2469 0 : FloatRegister out = ToFloatRegister(ins->output());
2470 0 : masm.convertInt32x4ToFloat32x4(in, out);
2471 0 : }
2472 :
2473 : void
2474 0 : CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins)
2475 : {
2476 0 : FloatRegister in = ToFloatRegister(ins->input());
2477 0 : FloatRegister out = ToFloatRegister(ins->output());
2478 0 : Register temp = ToRegister(ins->temp());
2479 :
2480 0 : masm.convertFloat32x4ToInt32x4(in, out);
2481 :
2482 0 : auto* ool = new(alloc()) OutOfLineSimdFloatToIntCheck(temp, in, ins,
2483 0 : ins->mir()->bytecodeOffset());
2484 0 : addOutOfLineCode(ool, ins->mir());
2485 :
2486 0 : static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648));
2487 :
2488 0 : ScratchSimd128Scope scratch(masm);
2489 0 : masm.loadConstantSimd128Int(InvalidResult, scratch);
2490 0 : masm.packedEqualInt32x4(Operand(out), scratch);
2491 : // TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of
2492 : // the two following instructions.
2493 0 : masm.vmovmskps(scratch, temp);
2494 0 : masm.cmp32(temp, Imm32(0));
2495 0 : masm.j(Assembler::NotEqual, ool->entry());
2496 :
2497 0 : masm.bind(ool->rejoin());
2498 0 : }
2499 :
2500 : void
2501 0 : CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck *ool)
2502 : {
2503 0 : static const SimdConstant Int32MaxX4 = SimdConstant::SplatX4(2147483647.f);
2504 0 : static const SimdConstant Int32MinX4 = SimdConstant::SplatX4(-2147483648.f);
2505 :
2506 0 : Label onConversionError;
2507 :
2508 0 : FloatRegister input = ool->input();
2509 0 : Register temp = ool->temp();
2510 :
2511 0 : ScratchSimd128Scope scratch(masm);
2512 0 : masm.loadConstantSimd128Float(Int32MinX4, scratch);
2513 0 : masm.vcmpleps(Operand(input), scratch, scratch);
2514 0 : masm.vmovmskps(scratch, temp);
2515 0 : masm.cmp32(temp, Imm32(15));
2516 0 : masm.j(Assembler::NotEqual, &onConversionError);
2517 :
2518 0 : masm.loadConstantSimd128Float(Int32MaxX4, scratch);
2519 0 : masm.vcmpleps(Operand(input), scratch, scratch);
2520 0 : masm.vmovmskps(scratch, temp);
2521 0 : masm.cmp32(temp, Imm32(0));
2522 0 : masm.j(Assembler::NotEqual, &onConversionError);
2523 :
2524 0 : masm.jump(ool->rejoin());
2525 :
2526 0 : if (gen->compilingWasm()) {
2527 0 : masm.bindLater(&onConversionError, trap(ool, wasm::Trap::ImpreciseSimdConversion));
2528 : } else {
2529 0 : masm.bind(&onConversionError);
2530 0 : bailout(ool->ins()->snapshot());
2531 : }
2532 0 : }
2533 :
2534 : // Convert Float32x4 to Uint32x4.
2535 : //
2536 : // If any input lane value is out of range or NaN, bail out.
2537 : void
2538 0 : CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
2539 : {
2540 0 : const MSimdConvert* mir = ins->mir();
2541 0 : FloatRegister in = ToFloatRegister(ins->input());
2542 0 : FloatRegister out = ToFloatRegister(ins->output());
2543 0 : Register temp = ToRegister(ins->tempR());
2544 0 : FloatRegister tempF = ToFloatRegister(ins->tempF());
2545 :
2546 : // Classify lane values into 4 disjoint classes:
2547 : //
2548 : // N-lanes: in <= -1.0
2549 : // A-lanes: -1.0 < in <= 0x0.ffffffp31
2550 : // B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32
2551 : // V-lanes: 0x1.0p32 <= in, or isnan(in)
2552 : //
2553 : // We need to bail out to throw a RangeError if we see any N-lanes or
2554 : // V-lanes.
2555 : //
2556 : // For A-lanes and B-lanes, we make two float -> int32 conversions:
2557 : //
2558 : // A = cvttps2dq(in)
2559 : // B = cvttps2dq(in - 0x1.0p31f)
2560 : //
2561 : // Note that the subtraction for the B computation is exact for B-lanes.
2562 : // There is no rounding, so B is the low 31 bits of the correctly converted
2563 : // result.
2564 : //
2565 : // The cvttps2dq instruction produces 0x80000000 when the input is NaN or
2566 : // out of range for a signed int32_t. This conveniently provides the missing
2567 : // high bit for B, so the desired result is A for A-lanes and A|B for
2568 : // B-lanes.
2569 :
2570 0 : ScratchSimd128Scope scratch(masm);
2571 :
2572 : // TODO: If the majority of lanes are A-lanes, it could be faster to compute
2573 : // A first, use vmovmskps to check for any non-A-lanes and handle them in
2574 : // ool code. OTOH, we we're wrong about the lane distribution, that would be
2575 : // slower.
2576 :
2577 : // Compute B in |scratch|.
2578 : static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
2579 0 : static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
2580 0 : masm.loadConstantSimd128Float(Bias, scratch);
2581 0 : masm.packedAddFloat32(Operand(in), scratch);
2582 0 : masm.convertFloat32x4ToInt32x4(scratch, scratch);
2583 :
2584 : // Compute A in |out|. This is the last time we use |in| and the first time
2585 : // we use |out|, so we can tolerate if they are the same register.
2586 0 : masm.convertFloat32x4ToInt32x4(in, out);
2587 :
2588 : // We can identify A-lanes by the sign bits in A: Any A-lanes will be
2589 : // positive in A, and N, B, and V-lanes will be 0x80000000 in A. Compute a
2590 : // mask of non-A-lanes into |tempF|.
2591 0 : masm.zeroSimd128Float(tempF);
2592 0 : masm.packedGreaterThanInt32x4(Operand(out), tempF);
2593 :
2594 : // Clear the A-lanes in B.
2595 0 : masm.bitwiseAndSimd128(Operand(tempF), scratch);
2596 :
2597 : // Compute the final result: A for A-lanes, A|B for B-lanes.
2598 0 : masm.bitwiseOrSimd128(Operand(scratch), out);
2599 :
2600 : // We still need to filter out the V-lanes. They would show up as 0x80000000
2601 : // in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
2602 : // the remaining negative lanes in B.
2603 0 : masm.vmovmskps(scratch, temp);
2604 0 : masm.cmp32(temp, Imm32(0));
2605 :
2606 0 : if (gen->compilingWasm())
2607 0 : masm.j(Assembler::NotEqual, trap(mir, wasm::Trap::ImpreciseSimdConversion));
2608 : else
2609 0 : bailoutIf(Assembler::NotEqual, ins->snapshot());
2610 0 : }
2611 :
2612 : void
2613 0 : CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins)
2614 : {
2615 0 : MOZ_ASSERT(ins->mir()->type() == MIRType::Int32x4 || ins->mir()->type() == MIRType::Bool32x4);
2616 :
2617 0 : FloatRegister output = ToFloatRegister(ins->output());
2618 0 : if (AssemblerX86Shared::HasSSE41()) {
2619 0 : masm.vmovd(ToRegister(ins->getOperand(0)), output);
2620 0 : for (size_t i = 1; i < 4; ++i) {
2621 0 : Register r = ToRegister(ins->getOperand(i));
2622 0 : masm.vpinsrd(i, r, output, output);
2623 : }
2624 0 : return;
2625 : }
2626 :
2627 0 : masm.reserveStack(Simd128DataSize);
2628 0 : for (size_t i = 0; i < 4; ++i) {
2629 0 : Register r = ToRegister(ins->getOperand(i));
2630 0 : masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
2631 : }
2632 0 : masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
2633 0 : masm.freeStack(Simd128DataSize);
2634 : }
2635 :
2636 : void
2637 0 : CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4* ins)
2638 : {
2639 0 : MOZ_ASSERT(ins->mir()->type() == MIRType::Float32x4);
2640 :
2641 0 : FloatRegister r0 = ToFloatRegister(ins->getOperand(0));
2642 0 : FloatRegister r1 = ToFloatRegister(ins->getOperand(1));
2643 0 : FloatRegister r2 = ToFloatRegister(ins->getOperand(2));
2644 0 : FloatRegister r3 = ToFloatRegister(ins->getOperand(3));
2645 0 : FloatRegister tmp = ToFloatRegister(ins->getTemp(0));
2646 0 : FloatRegister output = ToFloatRegister(ins->output());
2647 :
2648 0 : FloatRegister r0Copy = masm.reusedInputFloat32x4(r0, output);
2649 0 : FloatRegister r1Copy = masm.reusedInputFloat32x4(r1, tmp);
2650 :
2651 0 : masm.vunpcklps(r3, r1Copy, tmp);
2652 0 : masm.vunpcklps(r2, r0Copy, output);
2653 0 : masm.vunpcklps(tmp, output, output);
2654 0 : }
2655 :
2656 : void
2657 0 : CodeGeneratorX86Shared::visitSimdSplatX16(LSimdSplatX16* ins)
2658 : {
2659 0 : MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 16);
2660 0 : Register input = ToRegister(ins->getOperand(0));
2661 0 : FloatRegister output = ToFloatRegister(ins->output());
2662 0 : masm.vmovd(input, output);
2663 0 : if (AssemblerX86Shared::HasSSSE3()) {
2664 0 : masm.zeroSimd128Int(ScratchSimd128Reg);
2665 0 : masm.vpshufb(ScratchSimd128Reg, output, output);
2666 : } else {
2667 : // Use two shifts to duplicate the low 8 bits into the low 16 bits.
2668 0 : masm.vpsllw(Imm32(8), output, output);
2669 0 : masm.vmovdqa(output, ScratchSimd128Reg);
2670 0 : masm.vpsrlw(Imm32(8), ScratchSimd128Reg, ScratchSimd128Reg);
2671 0 : masm.vpor(ScratchSimd128Reg, output, output);
2672 : // Then do an X8 splat.
2673 0 : masm.vpshuflw(0, output, output);
2674 0 : masm.vpshufd(0, output, output);
2675 : }
2676 0 : }
2677 :
2678 : void
2679 0 : CodeGeneratorX86Shared::visitSimdSplatX8(LSimdSplatX8* ins)
2680 : {
2681 0 : MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 8);
2682 0 : Register input = ToRegister(ins->getOperand(0));
2683 0 : FloatRegister output = ToFloatRegister(ins->output());
2684 0 : masm.vmovd(input, output);
2685 0 : masm.vpshuflw(0, output, output);
2686 0 : masm.vpshufd(0, output, output);
2687 0 : }
2688 :
2689 : void
2690 0 : CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4* ins)
2691 : {
2692 0 : FloatRegister output = ToFloatRegister(ins->output());
2693 :
2694 0 : MSimdSplat* mir = ins->mir();
2695 0 : MOZ_ASSERT(IsSimdType(mir->type()));
2696 : JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
2697 :
2698 0 : if (mir->type() == MIRType::Float32x4) {
2699 0 : FloatRegister r = ToFloatRegister(ins->getOperand(0));
2700 0 : FloatRegister rCopy = masm.reusedInputFloat32x4(r, output);
2701 0 : masm.vshufps(0, rCopy, rCopy, output);
2702 : } else {
2703 0 : Register r = ToRegister(ins->getOperand(0));
2704 0 : masm.vmovd(r, output);
2705 0 : masm.vpshufd(0, output, output);
2706 : }
2707 0 : }
2708 :
2709 : void
2710 0 : CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
2711 : {
2712 0 : FloatRegister input = ToFloatRegister(ins->input());
2713 0 : FloatRegister output = ToFloatRegister(ins->output());
2714 :
2715 0 : if (input.aliases(output))
2716 0 : return;
2717 :
2718 0 : if (IsIntegerSimdType(ins->mir()->type()))
2719 0 : masm.vmovdqa(input, output);
2720 : else
2721 0 : masm.vmovaps(input, output);
2722 : }
2723 :
2724 : // Extract an integer lane from the 32x4 vector register |input| and place it in
2725 : // |output|.
2726 : void
2727 0 : CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
2728 : {
2729 0 : if (lane == 0) {
2730 : // The value we want to extract is in the low double-word
2731 0 : masm.moveLowInt32(input, output);
2732 0 : } else if (AssemblerX86Shared::HasSSE41()) {
2733 0 : masm.vpextrd(lane, input, output);
2734 : } else {
2735 0 : uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
2736 0 : masm.shuffleInt32(mask, input, ScratchSimd128Reg);
2737 0 : masm.moveLowInt32(ScratchSimd128Reg, output);
2738 : }
2739 0 : }
2740 :
2741 : // Extract an integer lane from the 16x8 vector register |input|, sign- or
2742 : // zero-extend to 32 bits and place the result in |output|.
2743 : void
2744 0 : CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output,
2745 : unsigned lane, SimdSign signedness)
2746 : {
2747 : // Unlike pextrd and pextrb, this is available in SSE2.
2748 0 : masm.vpextrw(lane, input, output);
2749 :
2750 0 : if (signedness == SimdSign::Signed)
2751 0 : masm.movswl(output, output);
2752 0 : }
2753 :
2754 : // Extract an integer lane from the 8x16 vector register |input|, sign- or
2755 : // zero-extend to 32 bits and place the result in |output|.
2756 : void
2757 0 : CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output,
2758 : unsigned lane, SimdSign signedness)
2759 : {
2760 0 : if (AssemblerX86Shared::HasSSE41()) {
2761 0 : masm.vpextrb(lane, input, output);
2762 : // vpextrb clears the high bits, so no further extension required.
2763 0 : if (signedness == SimdSign::Unsigned)
2764 0 : signedness = SimdSign::NotApplicable;
2765 : } else {
2766 : // Extract the relevant 16 bits containing our lane, then shift the
2767 : // right 8 bits into place.
2768 0 : emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned);
2769 0 : if (lane % 2) {
2770 0 : masm.shrl(Imm32(8), output);
2771 : // The shrl handles the zero-extension. Don't repeat it.
2772 0 : if (signedness == SimdSign::Unsigned)
2773 0 : signedness = SimdSign::NotApplicable;
2774 : }
2775 : }
2776 :
2777 : // We have the right low 8 bits in |output|, but we may need to fix the high
2778 : // bits. Note that this requires |output| to be one of the %eax-%edx
2779 : // registers.
2780 0 : switch (signedness) {
2781 : case SimdSign::Signed:
2782 0 : masm.movsbl(output, output);
2783 0 : break;
2784 : case SimdSign::Unsigned:
2785 0 : masm.movzbl(output, output);
2786 0 : break;
2787 : case SimdSign::NotApplicable:
2788 : // No adjustment needed.
2789 0 : break;
2790 : }
2791 0 : }
2792 :
2793 : void
2794 0 : CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
2795 : {
2796 0 : FloatRegister input = ToFloatRegister(ins->input());
2797 0 : Register output = ToRegister(ins->output());
2798 0 : MSimdExtractElement* mir = ins->mir();
2799 0 : unsigned length = SimdTypeToLength(mir->specialization());
2800 :
2801 0 : switch (length) {
2802 : case 4:
2803 0 : emitSimdExtractLane32x4(input, output, mir->lane());
2804 0 : break;
2805 : case 8:
2806 : // Get a lane, don't bother fixing the high bits since we'll mask below.
2807 0 : emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable);
2808 0 : break;
2809 : case 16:
2810 0 : emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable);
2811 0 : break;
2812 : default:
2813 0 : MOZ_CRASH("Unhandled SIMD length");
2814 : }
2815 :
2816 : // We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits.
2817 0 : masm.and32(Imm32(1), output);
2818 0 : }
2819 :
2820 : void
2821 0 : CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
2822 : {
2823 0 : FloatRegister input = ToFloatRegister(ins->input());
2824 0 : Register output = ToRegister(ins->output());
2825 0 : MSimdExtractElement* mir = ins->mir();
2826 0 : unsigned length = SimdTypeToLength(mir->specialization());
2827 :
2828 0 : switch (length) {
2829 : case 4:
2830 0 : emitSimdExtractLane32x4(input, output, mir->lane());
2831 0 : break;
2832 : case 8:
2833 0 : emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness());
2834 0 : break;
2835 : case 16:
2836 0 : emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness());
2837 0 : break;
2838 : default:
2839 0 : MOZ_CRASH("Unhandled SIMD length");
2840 : }
2841 0 : }
2842 :
2843 : void
2844 0 : CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
2845 : {
2846 0 : FloatRegister input = ToFloatRegister(ins->input());
2847 0 : FloatRegister output = ToFloatRegister(ins->output());
2848 0 : Register temp = ToRegister(ins->temp());
2849 0 : MSimdExtractElement* mir = ins->mir();
2850 0 : MOZ_ASSERT(mir->specialization() == MIRType::Int32x4);
2851 0 : emitSimdExtractLane32x4(input, temp, mir->lane());
2852 0 : masm.convertUInt32ToDouble(temp, output);
2853 0 : }
2854 :
2855 : void
2856 0 : CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
2857 : {
2858 0 : FloatRegister input = ToFloatRegister(ins->input());
2859 0 : FloatRegister output = ToFloatRegister(ins->output());
2860 :
2861 0 : unsigned lane = ins->mir()->lane();
2862 0 : if (lane == 0) {
2863 : // The value we want to extract is in the low double-word
2864 0 : if (input != output)
2865 0 : masm.moveFloat32(input, output);
2866 0 : } else if (lane == 2) {
2867 0 : masm.moveHighPairToLowPairFloat32(input, output);
2868 : } else {
2869 0 : uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
2870 0 : masm.shuffleFloat32(mask, input, output);
2871 : }
2872 : // NaNs contained within SIMD values are not enforced to be canonical, so
2873 : // when we extract an element into a "regular" scalar JS value, we have to
2874 : // canonicalize. In wasm code, we can skip this, as wasm only has to
2875 : // canonicalize NaNs at FFI boundaries.
2876 0 : if (!gen->compilingWasm())
2877 0 : masm.canonicalizeFloat(output);
2878 0 : }
2879 :
2880 : void
2881 0 : CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI* ins)
2882 : {
2883 0 : FloatRegister vector = ToFloatRegister(ins->vector());
2884 0 : Register value = ToRegister(ins->value());
2885 0 : FloatRegister output = ToFloatRegister(ins->output());
2886 0 : MOZ_ASSERT(vector == output); // defineReuseInput(0)
2887 :
2888 0 : unsigned lane = ins->lane();
2889 0 : unsigned length = ins->length();
2890 :
2891 0 : if (length == 8) {
2892 : // Available in SSE 2.
2893 0 : masm.vpinsrw(lane, value, vector, output);
2894 0 : return;
2895 : }
2896 :
2897 : // Note that, contrarily to float32x4, we cannot use vmovd if the inserted
2898 : // value goes into the first component, as vmovd clears out the higher lanes
2899 : // of the output.
2900 0 : if (AssemblerX86Shared::HasSSE41()) {
2901 : // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX.
2902 0 : switch (length) {
2903 : case 4:
2904 0 : masm.vpinsrd(lane, value, vector, output);
2905 0 : return;
2906 : case 16:
2907 0 : masm.vpinsrb(lane, value, vector, output);
2908 0 : return;
2909 : }
2910 : }
2911 :
2912 0 : masm.reserveStack(Simd128DataSize);
2913 0 : masm.storeAlignedSimd128Int(vector, Address(StackPointer, 0));
2914 0 : switch (length) {
2915 : case 4:
2916 0 : masm.store32(value, Address(StackPointer, lane * sizeof(int32_t)));
2917 0 : break;
2918 : case 16:
2919 : // Note that this requires `value` to be in one the registers where the
2920 : // low 8 bits are addressible (%eax - %edx on x86, all of them on x86-64).
2921 0 : masm.store8(value, Address(StackPointer, lane * sizeof(int8_t)));
2922 0 : break;
2923 : default:
2924 0 : MOZ_CRASH("Unsupported SIMD length");
2925 : }
2926 0 : masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
2927 0 : masm.freeStack(Simd128DataSize);
2928 : }
2929 :
2930 : void
2931 0 : CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF* ins)
2932 : {
2933 0 : FloatRegister vector = ToFloatRegister(ins->vector());
2934 0 : FloatRegister value = ToFloatRegister(ins->value());
2935 0 : FloatRegister output = ToFloatRegister(ins->output());
2936 0 : MOZ_ASSERT(vector == output); // defineReuseInput(0)
2937 :
2938 0 : if (ins->lane() == 0) {
2939 : // As both operands are registers, vmovss doesn't modify the upper bits
2940 : // of the destination operand.
2941 0 : if (value != output)
2942 0 : masm.vmovss(value, vector, output);
2943 0 : return;
2944 : }
2945 :
2946 0 : if (AssemblerX86Shared::HasSSE41()) {
2947 : // The input value is in the low float32 of the 'value' FloatRegister.
2948 0 : masm.vinsertps(masm.vinsertpsMask(0, ins->lane()), value, output, output);
2949 0 : return;
2950 : }
2951 :
2952 0 : unsigned component = unsigned(ins->lane());
2953 0 : masm.reserveStack(Simd128DataSize);
2954 0 : masm.storeAlignedSimd128Float(vector, Address(StackPointer, 0));
2955 0 : masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t)));
2956 0 : masm.loadAlignedSimd128Float(Address(StackPointer, 0), output);
2957 0 : masm.freeStack(Simd128DataSize);
2958 : }
2959 :
2960 : void
2961 0 : CodeGeneratorX86Shared::visitSimdAllTrue(LSimdAllTrue* ins)
2962 : {
2963 0 : FloatRegister input = ToFloatRegister(ins->input());
2964 0 : Register output = ToRegister(ins->output());
2965 :
2966 : // We know that the input lanes are boolean, so they are either 0 or -1.
2967 : // The all-true vector has all 128 bits set, no matter the lane geometry.
2968 0 : masm.vpmovmskb(input, output);
2969 0 : masm.cmp32(output, Imm32(0xffff));
2970 0 : masm.emitSet(Assembler::Zero, output);
2971 0 : }
2972 :
2973 : void
2974 0 : CodeGeneratorX86Shared::visitSimdAnyTrue(LSimdAnyTrue* ins)
2975 : {
2976 0 : FloatRegister input = ToFloatRegister(ins->input());
2977 0 : Register output = ToRegister(ins->output());
2978 :
2979 0 : masm.vpmovmskb(input, output);
2980 0 : masm.cmp32(output, Imm32(0x0));
2981 0 : masm.emitSet(Assembler::NonZero, output);
2982 0 : }
2983 :
2984 : template <class T, class Reg> void
2985 0 : CodeGeneratorX86Shared::visitSimdGeneralShuffle(LSimdGeneralShuffleBase* ins, Reg tempRegister)
2986 : {
2987 0 : MSimdGeneralShuffle* mir = ins->mir();
2988 0 : unsigned numVectors = mir->numVectors();
2989 :
2990 0 : Register laneTemp = ToRegister(ins->temp());
2991 :
2992 : // This won't generate fast code, but it's fine because we expect users
2993 : // to have used constant indices (and thus MSimdGeneralShuffle to be fold
2994 : // into MSimdSwizzle/MSimdShuffle, which are fast).
2995 :
2996 : // We need stack space for the numVectors inputs and for the output vector.
2997 0 : unsigned stackSpace = Simd128DataSize * (numVectors + 1);
2998 0 : masm.reserveStack(stackSpace);
2999 :
3000 0 : for (unsigned i = 0; i < numVectors; i++) {
3001 0 : masm.storeAlignedVector<T>(ToFloatRegister(ins->vector(i)),
3002 : Address(StackPointer, Simd128DataSize * (1 + i)));
3003 : }
3004 :
3005 0 : Label bail;
3006 0 : const Scale laneScale = ScaleFromElemWidth(sizeof(T));
3007 :
3008 0 : for (size_t i = 0; i < mir->numLanes(); i++) {
3009 0 : Operand lane = ToOperand(ins->lane(i));
3010 :
3011 0 : masm.cmp32(lane, Imm32(numVectors * mir->numLanes() - 1));
3012 0 : masm.j(Assembler::Above, &bail);
3013 :
3014 0 : if (lane.kind() == Operand::REG) {
3015 0 : masm.loadScalar<T>(Operand(StackPointer, ToRegister(ins->lane(i)), laneScale, Simd128DataSize),
3016 : tempRegister);
3017 : } else {
3018 0 : masm.load32(lane, laneTemp);
3019 0 : masm.loadScalar<T>(Operand(StackPointer, laneTemp, laneScale, Simd128DataSize), tempRegister);
3020 : }
3021 :
3022 0 : masm.storeScalar<T>(tempRegister, Address(StackPointer, i * sizeof(T)));
3023 : }
3024 :
3025 0 : FloatRegister output = ToFloatRegister(ins->output());
3026 0 : masm.loadAlignedVector<T>(Address(StackPointer, 0), output);
3027 :
3028 0 : Label join;
3029 0 : masm.jump(&join);
3030 :
3031 : {
3032 0 : masm.bind(&bail);
3033 0 : masm.freeStack(stackSpace);
3034 0 : bailout(ins->snapshot());
3035 : }
3036 :
3037 0 : masm.bind(&join);
3038 0 : masm.setFramePushed(masm.framePushed() + stackSpace);
3039 0 : masm.freeStack(stackSpace);
3040 0 : }
3041 :
3042 : void
3043 0 : CodeGeneratorX86Shared::visitSimdGeneralShuffleI(LSimdGeneralShuffleI* ins)
3044 : {
3045 0 : switch (ins->mir()->type()) {
3046 : case MIRType::Int8x16:
3047 0 : return visitSimdGeneralShuffle<int8_t, Register>(ins, ToRegister(ins->temp()));
3048 : case MIRType::Int16x8:
3049 0 : return visitSimdGeneralShuffle<int16_t, Register>(ins, ToRegister(ins->temp()));
3050 : case MIRType::Int32x4:
3051 0 : return visitSimdGeneralShuffle<int32_t, Register>(ins, ToRegister(ins->temp()));
3052 : default:
3053 0 : MOZ_CRASH("unsupported type for general shuffle");
3054 : }
3055 : }
3056 : void
3057 0 : CodeGeneratorX86Shared::visitSimdGeneralShuffleF(LSimdGeneralShuffleF* ins)
3058 : {
3059 0 : ScratchFloat32Scope scratch(masm);
3060 0 : visitSimdGeneralShuffle<float, FloatRegister>(ins, scratch);
3061 0 : }
3062 :
3063 : void
3064 0 : CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins)
3065 : {
3066 0 : FloatRegister input = ToFloatRegister(ins->input());
3067 0 : FloatRegister output = ToFloatRegister(ins->output());
3068 0 : const unsigned numLanes = ins->numLanes();
3069 :
3070 0 : switch (numLanes) {
3071 : case 4: {
3072 0 : uint32_t x = ins->lane(0);
3073 0 : uint32_t y = ins->lane(1);
3074 0 : uint32_t z = ins->lane(2);
3075 0 : uint32_t w = ins->lane(3);
3076 :
3077 0 : uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
3078 0 : masm.shuffleInt32(mask, input, output);
3079 0 : return;
3080 : }
3081 : }
3082 :
3083 : // In the general case, use pshufb if it is available. Convert to a
3084 : // byte-wise swizzle.
3085 0 : const unsigned bytesPerLane = 16 / numLanes;
3086 : int8_t bLane[16];
3087 0 : for (unsigned i = 0; i < numLanes; i++) {
3088 0 : for (unsigned b = 0; b < bytesPerLane; b++) {
3089 0 : bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
3090 : }
3091 : }
3092 :
3093 0 : if (AssemblerX86Shared::HasSSSE3()) {
3094 0 : ScratchSimd128Scope scratch(masm);
3095 0 : masm.loadConstantSimd128Int(SimdConstant::CreateX16(bLane), scratch);
3096 0 : FloatRegister inputCopy = masm.reusedInputInt32x4(input, output);
3097 0 : masm.vpshufb(scratch, inputCopy, output);
3098 0 : return;
3099 : }
3100 :
3101 : // Worst-case fallback for pre-SSSE3 machines. Bounce through memory.
3102 0 : Register temp = ToRegister(ins->getTemp(0));
3103 0 : masm.reserveStack(2 * Simd128DataSize);
3104 0 : masm.storeAlignedSimd128Int(input, Address(StackPointer, Simd128DataSize));
3105 0 : for (unsigned i = 0; i < 16; i++) {
3106 0 : masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
3107 0 : masm.store8(temp, Address(StackPointer, i));
3108 : }
3109 0 : masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
3110 0 : masm.freeStack(2 * Simd128DataSize);
3111 : }
3112 :
3113 : void
3114 0 : CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins)
3115 : {
3116 0 : FloatRegister input = ToFloatRegister(ins->input());
3117 0 : FloatRegister output = ToFloatRegister(ins->output());
3118 0 : MOZ_ASSERT(ins->numLanes() == 4);
3119 :
3120 0 : uint32_t x = ins->lane(0);
3121 0 : uint32_t y = ins->lane(1);
3122 0 : uint32_t z = ins->lane(2);
3123 0 : uint32_t w = ins->lane(3);
3124 :
3125 0 : if (AssemblerX86Shared::HasSSE3()) {
3126 0 : if (ins->lanesMatch(0, 0, 2, 2)) {
3127 0 : masm.vmovsldup(input, output);
3128 0 : return;
3129 : }
3130 0 : if (ins->lanesMatch(1, 1, 3, 3)) {
3131 0 : masm.vmovshdup(input, output);
3132 0 : return;
3133 : }
3134 : }
3135 :
3136 : // TODO Here and below, arch specific lowering could identify this pattern
3137 : // and use defineReuseInput to avoid this move (bug 1084404)
3138 0 : if (ins->lanesMatch(2, 3, 2, 3)) {
3139 0 : FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
3140 0 : masm.vmovhlps(input, inputCopy, output);
3141 0 : return;
3142 : }
3143 :
3144 0 : if (ins->lanesMatch(0, 1, 0, 1)) {
3145 0 : if (AssemblerX86Shared::HasSSE3() && !AssemblerX86Shared::HasAVX()) {
3146 0 : masm.vmovddup(input, output);
3147 0 : return;
3148 : }
3149 0 : FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
3150 0 : masm.vmovlhps(input, inputCopy, output);
3151 0 : return;
3152 : }
3153 :
3154 0 : if (ins->lanesMatch(0, 0, 1, 1)) {
3155 0 : FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
3156 0 : masm.vunpcklps(input, inputCopy, output);
3157 0 : return;
3158 : }
3159 :
3160 0 : if (ins->lanesMatch(2, 2, 3, 3)) {
3161 0 : FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
3162 0 : masm.vunpckhps(input, inputCopy, output);
3163 0 : return;
3164 : }
3165 :
3166 0 : uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
3167 0 : masm.shuffleFloat32(mask, input, output);
3168 : }
3169 :
3170 : void
3171 0 : CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins)
3172 : {
3173 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3174 0 : FloatRegister rhs = ToFloatRegister(ins->rhs());
3175 0 : FloatRegister output = ToFloatRegister(ins->output());
3176 0 : const unsigned numLanes = ins->numLanes();
3177 0 : const unsigned bytesPerLane = 16 / numLanes;
3178 :
3179 : // Convert the shuffle to a byte-wise shuffle.
3180 : uint8_t bLane[16];
3181 0 : for (unsigned i = 0; i < numLanes; i++) {
3182 0 : for (unsigned b = 0; b < bytesPerLane; b++) {
3183 0 : bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
3184 : }
3185 : }
3186 :
3187 : // Use pshufb if it is available.
3188 0 : if (AssemblerX86Shared::HasSSSE3()) {
3189 0 : FloatRegister scratch1 = ToFloatRegister(ins->temp());
3190 0 : ScratchSimd128Scope scratch2(masm);
3191 :
3192 : // Use pshufb instructions to gather the lanes from each source vector.
3193 : // A negative index creates a zero lane, so the two vectors can be combined.
3194 :
3195 : // Set scratch2 = lanes from lhs.
3196 : int8_t idx[16];
3197 0 : for (unsigned i = 0; i < 16; i++)
3198 0 : idx[i] = bLane[i] < 16 ? bLane[i] : -1;
3199 0 : masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
3200 0 : FloatRegister lhsCopy = masm.reusedInputInt32x4(lhs, scratch2);
3201 0 : masm.vpshufb(scratch1, lhsCopy, scratch2);
3202 :
3203 : // Set output = lanes from rhs.
3204 0 : for (unsigned i = 0; i < 16; i++)
3205 0 : idx[i] = bLane[i] >= 16 ? bLane[i] - 16 : -1;
3206 0 : masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
3207 0 : FloatRegister rhsCopy = masm.reusedInputInt32x4(rhs, output);
3208 0 : masm.vpshufb(scratch1, rhsCopy, output);
3209 :
3210 : // Combine.
3211 0 : masm.vpor(scratch2, output, output);
3212 0 : return;
3213 : }
3214 :
3215 : // Worst-case fallback for pre-SSE3 machines. Bounce through memory.
3216 0 : Register temp = ToRegister(ins->getTemp(0));
3217 0 : masm.reserveStack(3 * Simd128DataSize);
3218 0 : masm.storeAlignedSimd128Int(lhs, Address(StackPointer, Simd128DataSize));
3219 0 : masm.storeAlignedSimd128Int(rhs, Address(StackPointer, 2 * Simd128DataSize));
3220 0 : for (unsigned i = 0; i < 16; i++) {
3221 0 : masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
3222 0 : masm.store8(temp, Address(StackPointer, i));
3223 : }
3224 0 : masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
3225 0 : masm.freeStack(3 * Simd128DataSize);
3226 : }
3227 :
3228 : void
3229 0 : CodeGeneratorX86Shared::visitSimdShuffleX4(LSimdShuffleX4* ins)
3230 : {
3231 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3232 0 : Operand rhs = ToOperand(ins->rhs());
3233 0 : FloatRegister out = ToFloatRegister(ins->output());
3234 :
3235 0 : uint32_t x = ins->lane(0);
3236 0 : uint32_t y = ins->lane(1);
3237 0 : uint32_t z = ins->lane(2);
3238 0 : uint32_t w = ins->lane(3);
3239 :
3240 : // Check that lanes come from LHS in majority:
3241 0 : unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
3242 0 : MOZ_ASSERT(numLanesFromLHS >= 2);
3243 :
3244 : // When reading this method, remember that vshufps takes the two first
3245 : // inputs of the destination operand (right operand) and the two last
3246 : // inputs of the source operand (left operand).
3247 : //
3248 : // Legend for explanations:
3249 : // - L: LHS
3250 : // - R: RHS
3251 : // - T: temporary
3252 :
3253 : uint32_t mask;
3254 :
3255 : // If all lanes came from a single vector, we should have constructed a
3256 : // MSimdSwizzle instead.
3257 0 : MOZ_ASSERT(numLanesFromLHS < 4);
3258 :
3259 : // If all values stay in their lane, this is a blend.
3260 0 : if (AssemblerX86Shared::HasSSE41()) {
3261 0 : if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) {
3262 0 : masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out);
3263 0 : return;
3264 : }
3265 : }
3266 :
3267 : // One element of the second, all other elements of the first
3268 0 : if (numLanesFromLHS == 3) {
3269 0 : unsigned firstMask = -1, secondMask = -1;
3270 :
3271 : // register-register vmovss preserves the high lanes.
3272 0 : if (ins->lanesMatch(4, 1, 2, 3) && rhs.kind() == Operand::FPREG) {
3273 0 : masm.vmovss(FloatRegister::FromCode(rhs.fpu()), lhs, out);
3274 0 : return;
3275 : }
3276 :
3277 : // SSE4.1 vinsertps can handle any single element.
3278 0 : unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3);
3279 0 : if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) {
3280 : unsigned srcLane;
3281 : unsigned dstLane;
3282 0 : if (x >= 4) {
3283 0 : srcLane = x - 4;
3284 0 : dstLane = 0;
3285 0 : } else if (y >= 4) {
3286 0 : srcLane = y - 4;
3287 0 : dstLane = 1;
3288 0 : } else if (z >= 4) {
3289 0 : srcLane = z - 4;
3290 0 : dstLane = 2;
3291 : } else {
3292 0 : MOZ_ASSERT(w >= 4);
3293 0 : srcLane = w - 4;
3294 0 : dstLane = 3;
3295 : }
3296 0 : masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out);
3297 0 : return;
3298 : }
3299 :
3300 0 : FloatRegister rhsCopy = ToFloatRegister(ins->temp());
3301 :
3302 0 : if (x < 4 && y < 4) {
3303 0 : if (w >= 4) {
3304 0 : w %= 4;
3305 : // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhsCopy)
3306 0 : firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
3307 : // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, out)
3308 0 : secondMask = MacroAssembler::ComputeShuffleMask(x, y, 2, 0);
3309 : } else {
3310 0 : MOZ_ASSERT(z >= 4);
3311 0 : z %= 4;
3312 : // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhsCopy)
3313 0 : firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
3314 : // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, out)
3315 0 : secondMask = MacroAssembler::ComputeShuffleMask(x, y, 0, 2);
3316 : }
3317 :
3318 0 : masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
3319 0 : masm.vshufps(secondMask, rhsCopy, lhs, out);
3320 0 : return;
3321 : }
3322 :
3323 0 : MOZ_ASSERT(z < 4 && w < 4);
3324 :
3325 0 : if (y >= 4) {
3326 0 : y %= 4;
3327 : // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhsCopy)
3328 0 : firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
3329 : // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, out)
3330 0 : secondMask = MacroAssembler::ComputeShuffleMask(2, 0, z, w);
3331 : } else {
3332 0 : MOZ_ASSERT(x >= 4);
3333 0 : x %= 4;
3334 : // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhsCopy)
3335 0 : firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
3336 : // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, out)
3337 0 : secondMask = MacroAssembler::ComputeShuffleMask(0, 2, z, w);
3338 : }
3339 :
3340 0 : masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
3341 0 : if (AssemblerX86Shared::HasAVX()) {
3342 0 : masm.vshufps(secondMask, lhs, rhsCopy, out);
3343 : } else {
3344 0 : masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy);
3345 0 : masm.moveSimd128Float(rhsCopy, out);
3346 : }
3347 0 : return;
3348 : }
3349 :
3350 : // Two elements from one vector, two other elements from the other
3351 0 : MOZ_ASSERT(numLanesFromLHS == 2);
3352 :
3353 : // TODO Here and below, symmetric case would be more handy to avoid a move,
3354 : // but can't be reached because operands would get swapped (bug 1084404).
3355 0 : if (ins->lanesMatch(2, 3, 6, 7)) {
3356 0 : ScratchSimd128Scope scratch(masm);
3357 0 : if (AssemblerX86Shared::HasAVX()) {
3358 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
3359 0 : masm.vmovhlps(lhs, rhsCopy, out);
3360 : } else {
3361 0 : masm.loadAlignedSimd128Float(rhs, scratch);
3362 0 : masm.vmovhlps(lhs, scratch, scratch);
3363 0 : masm.moveSimd128Float(scratch, out);
3364 : }
3365 0 : return;
3366 : }
3367 :
3368 0 : if (ins->lanesMatch(0, 1, 4, 5)) {
3369 0 : FloatRegister rhsCopy;
3370 0 : ScratchSimd128Scope scratch(masm);
3371 0 : if (rhs.kind() == Operand::FPREG) {
3372 : // No need to make an actual copy, since the operand is already
3373 : // in a register, and it won't be clobbered by the vmovlhps.
3374 0 : rhsCopy = FloatRegister::FromCode(rhs.fpu());
3375 : } else {
3376 0 : masm.loadAlignedSimd128Float(rhs, scratch);
3377 0 : rhsCopy = scratch;
3378 : }
3379 0 : masm.vmovlhps(rhsCopy, lhs, out);
3380 0 : return;
3381 : }
3382 :
3383 0 : if (ins->lanesMatch(0, 4, 1, 5)) {
3384 0 : masm.vunpcklps(rhs, lhs, out);
3385 0 : return;
3386 : }
3387 :
3388 : // TODO swapped case would be better (bug 1084404)
3389 0 : if (ins->lanesMatch(4, 0, 5, 1)) {
3390 0 : ScratchSimd128Scope scratch(masm);
3391 0 : if (AssemblerX86Shared::HasAVX()) {
3392 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
3393 0 : masm.vunpcklps(lhs, rhsCopy, out);
3394 : } else {
3395 0 : masm.loadAlignedSimd128Float(rhs, scratch);
3396 0 : masm.vunpcklps(lhs, scratch, scratch);
3397 0 : masm.moveSimd128Float(scratch, out);
3398 : }
3399 0 : return;
3400 : }
3401 :
3402 0 : if (ins->lanesMatch(2, 6, 3, 7)) {
3403 0 : masm.vunpckhps(rhs, lhs, out);
3404 0 : return;
3405 : }
3406 :
3407 : // TODO swapped case would be better (bug 1084404)
3408 0 : if (ins->lanesMatch(6, 2, 7, 3)) {
3409 0 : ScratchSimd128Scope scratch(masm);
3410 0 : if (AssemblerX86Shared::HasAVX()) {
3411 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
3412 0 : masm.vunpckhps(lhs, rhsCopy, out);
3413 : } else {
3414 0 : masm.loadAlignedSimd128Float(rhs, scratch);
3415 0 : masm.vunpckhps(lhs, scratch, scratch);
3416 0 : masm.moveSimd128Float(scratch, out);
3417 : }
3418 0 : return;
3419 : }
3420 :
3421 : // In one vshufps
3422 0 : if (x < 4 && y < 4) {
3423 0 : mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
3424 0 : masm.vshufps(mask, rhs, lhs, out);
3425 0 : return;
3426 : }
3427 :
3428 : // At creation, we should have explicitly swapped in this case.
3429 0 : MOZ_ASSERT(!(z >= 4 && w >= 4));
3430 :
3431 : // In two vshufps, for the most generic case:
3432 : uint32_t firstMask[4], secondMask[4];
3433 0 : unsigned i = 0, j = 2, k = 0;
3434 :
3435 : #define COMPUTE_MASK(lane) \
3436 : if (lane >= 4) { \
3437 : firstMask[j] = lane % 4; \
3438 : secondMask[k++] = j++; \
3439 : } else { \
3440 : firstMask[i] = lane; \
3441 : secondMask[k++] = i++; \
3442 : }
3443 :
3444 0 : COMPUTE_MASK(x)
3445 0 : COMPUTE_MASK(y)
3446 0 : COMPUTE_MASK(z)
3447 0 : COMPUTE_MASK(w)
3448 : #undef COMPUTE_MASK
3449 :
3450 0 : MOZ_ASSERT(i == 2 && j == 4 && k == 4);
3451 :
3452 0 : mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
3453 0 : firstMask[2], firstMask[3]);
3454 0 : masm.vshufps(mask, rhs, lhs, lhs);
3455 :
3456 0 : mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
3457 0 : secondMask[2], secondMask[3]);
3458 0 : masm.vshufps(mask, lhs, lhs, lhs);
3459 : }
3460 :
3461 : void
3462 0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx16(LSimdBinaryCompIx16* ins)
3463 : {
3464 0 : static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
3465 :
3466 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3467 0 : Operand rhs = ToOperand(ins->rhs());
3468 0 : FloatRegister output = ToFloatRegister(ins->output());
3469 0 : MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
3470 :
3471 0 : ScratchSimd128Scope scratch(masm);
3472 :
3473 0 : MSimdBinaryComp::Operation op = ins->operation();
3474 0 : switch (op) {
3475 : case MSimdBinaryComp::greaterThan:
3476 0 : masm.vpcmpgtb(rhs, lhs, output);
3477 0 : return;
3478 : case MSimdBinaryComp::equal:
3479 0 : masm.vpcmpeqb(rhs, lhs, output);
3480 0 : return;
3481 : case MSimdBinaryComp::lessThan:
3482 : // src := rhs
3483 0 : if (rhs.kind() == Operand::FPREG)
3484 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3485 : else
3486 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3487 :
3488 : // src := src > lhs (i.e. lhs < rhs)
3489 : // Improve by doing custom lowering (rhs is tied to the output register)
3490 0 : masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
3491 0 : masm.moveSimd128Int(scratch, output);
3492 0 : return;
3493 : case MSimdBinaryComp::notEqual:
3494 : // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
3495 : // should invert the comparison by, e.g. swapping the arms of a select
3496 : // if that's what it's used in.
3497 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3498 0 : masm.vpcmpeqb(rhs, lhs, output);
3499 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3500 0 : return;
3501 : case MSimdBinaryComp::greaterThanOrEqual:
3502 : // src := rhs
3503 0 : if (rhs.kind() == Operand::FPREG)
3504 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3505 : else
3506 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3507 0 : masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
3508 0 : masm.loadConstantSimd128Int(allOnes, output);
3509 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3510 0 : return;
3511 : case MSimdBinaryComp::lessThanOrEqual:
3512 : // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
3513 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3514 0 : masm.vpcmpgtb(rhs, lhs, output);
3515 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3516 0 : return;
3517 : }
3518 0 : MOZ_CRASH("unexpected SIMD op");
3519 : }
3520 :
3521 : void
3522 0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx8(LSimdBinaryCompIx8* ins)
3523 : {
3524 0 : static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
3525 :
3526 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3527 0 : Operand rhs = ToOperand(ins->rhs());
3528 0 : FloatRegister output = ToFloatRegister(ins->output());
3529 0 : MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
3530 :
3531 0 : ScratchSimd128Scope scratch(masm);
3532 :
3533 0 : MSimdBinaryComp::Operation op = ins->operation();
3534 0 : switch (op) {
3535 : case MSimdBinaryComp::greaterThan:
3536 0 : masm.vpcmpgtw(rhs, lhs, output);
3537 0 : return;
3538 : case MSimdBinaryComp::equal:
3539 0 : masm.vpcmpeqw(rhs, lhs, output);
3540 0 : return;
3541 : case MSimdBinaryComp::lessThan:
3542 : // src := rhs
3543 0 : if (rhs.kind() == Operand::FPREG)
3544 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3545 : else
3546 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3547 :
3548 : // src := src > lhs (i.e. lhs < rhs)
3549 : // Improve by doing custom lowering (rhs is tied to the output register)
3550 0 : masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
3551 0 : masm.moveSimd128Int(scratch, output);
3552 0 : return;
3553 : case MSimdBinaryComp::notEqual:
3554 : // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
3555 : // should invert the comparison by, e.g. swapping the arms of a select
3556 : // if that's what it's used in.
3557 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3558 0 : masm.vpcmpeqw(rhs, lhs, output);
3559 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3560 0 : return;
3561 : case MSimdBinaryComp::greaterThanOrEqual:
3562 : // src := rhs
3563 0 : if (rhs.kind() == Operand::FPREG)
3564 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3565 : else
3566 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3567 0 : masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
3568 0 : masm.loadConstantSimd128Int(allOnes, output);
3569 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3570 0 : return;
3571 : case MSimdBinaryComp::lessThanOrEqual:
3572 : // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
3573 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3574 0 : masm.vpcmpgtw(rhs, lhs, output);
3575 0 : masm.bitwiseXorSimd128(Operand(scratch), output);
3576 0 : return;
3577 : }
3578 0 : MOZ_CRASH("unexpected SIMD op");
3579 : }
3580 :
3581 : void
3582 0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
3583 : {
3584 0 : static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
3585 :
3586 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3587 0 : Operand rhs = ToOperand(ins->rhs());
3588 0 : MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
3589 :
3590 0 : ScratchSimd128Scope scratch(masm);
3591 :
3592 0 : MSimdBinaryComp::Operation op = ins->operation();
3593 0 : switch (op) {
3594 : case MSimdBinaryComp::greaterThan:
3595 0 : masm.packedGreaterThanInt32x4(rhs, lhs);
3596 0 : return;
3597 : case MSimdBinaryComp::equal:
3598 0 : masm.packedEqualInt32x4(rhs, lhs);
3599 0 : return;
3600 : case MSimdBinaryComp::lessThan:
3601 : // src := rhs
3602 0 : if (rhs.kind() == Operand::FPREG)
3603 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3604 : else
3605 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3606 :
3607 : // src := src > lhs (i.e. lhs < rhs)
3608 : // Improve by doing custom lowering (rhs is tied to the output register)
3609 0 : masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
3610 0 : masm.moveSimd128Int(scratch, lhs);
3611 0 : return;
3612 : case MSimdBinaryComp::notEqual:
3613 : // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
3614 : // should invert the comparison by, e.g. swapping the arms of a select
3615 : // if that's what it's used in.
3616 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3617 0 : masm.packedEqualInt32x4(rhs, lhs);
3618 0 : masm.bitwiseXorSimd128(Operand(scratch), lhs);
3619 0 : return;
3620 : case MSimdBinaryComp::greaterThanOrEqual:
3621 : // src := rhs
3622 0 : if (rhs.kind() == Operand::FPREG)
3623 0 : masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
3624 : else
3625 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3626 0 : masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
3627 0 : masm.loadConstantSimd128Int(allOnes, lhs);
3628 0 : masm.bitwiseXorSimd128(Operand(scratch), lhs);
3629 0 : return;
3630 : case MSimdBinaryComp::lessThanOrEqual:
3631 : // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
3632 0 : masm.loadConstantSimd128Int(allOnes, scratch);
3633 0 : masm.packedGreaterThanInt32x4(rhs, lhs);
3634 0 : masm.bitwiseXorSimd128(Operand(scratch), lhs);
3635 0 : return;
3636 : }
3637 0 : MOZ_CRASH("unexpected SIMD op");
3638 : }
3639 :
3640 : void
3641 0 : CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4* ins)
3642 : {
3643 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3644 0 : Operand rhs = ToOperand(ins->rhs());
3645 0 : FloatRegister output = ToFloatRegister(ins->output());
3646 :
3647 0 : MSimdBinaryComp::Operation op = ins->operation();
3648 0 : switch (op) {
3649 : case MSimdBinaryComp::equal:
3650 0 : masm.vcmpeqps(rhs, lhs, output);
3651 0 : return;
3652 : case MSimdBinaryComp::lessThan:
3653 0 : masm.vcmpltps(rhs, lhs, output);
3654 0 : return;
3655 : case MSimdBinaryComp::lessThanOrEqual:
3656 0 : masm.vcmpleps(rhs, lhs, output);
3657 0 : return;
3658 : case MSimdBinaryComp::notEqual:
3659 0 : masm.vcmpneqps(rhs, lhs, output);
3660 0 : return;
3661 : case MSimdBinaryComp::greaterThanOrEqual:
3662 : case MSimdBinaryComp::greaterThan:
3663 : // We reverse these before register allocation so that we don't have to
3664 : // copy into and out of temporaries after codegen.
3665 0 : MOZ_CRASH("lowering should have reversed this");
3666 : }
3667 0 : MOZ_CRASH("unexpected SIMD op");
3668 : }
3669 :
3670 : void
3671 0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx16(LSimdBinaryArithIx16* ins)
3672 : {
3673 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3674 0 : Operand rhs = ToOperand(ins->rhs());
3675 0 : FloatRegister output = ToFloatRegister(ins->output());
3676 :
3677 0 : MSimdBinaryArith::Operation op = ins->operation();
3678 0 : switch (op) {
3679 : case MSimdBinaryArith::Op_add:
3680 0 : masm.vpaddb(rhs, lhs, output);
3681 0 : return;
3682 : case MSimdBinaryArith::Op_sub:
3683 0 : masm.vpsubb(rhs, lhs, output);
3684 0 : return;
3685 : case MSimdBinaryArith::Op_mul:
3686 : // 8x16 mul is a valid operation, but not supported in SSE or AVX.
3687 : // The operation is synthesized from 16x8 multiplies by
3688 : // MSimdBinaryArith::AddLegalized().
3689 0 : break;
3690 : case MSimdBinaryArith::Op_div:
3691 : case MSimdBinaryArith::Op_max:
3692 : case MSimdBinaryArith::Op_min:
3693 : case MSimdBinaryArith::Op_minNum:
3694 : case MSimdBinaryArith::Op_maxNum:
3695 0 : break;
3696 : }
3697 0 : MOZ_CRASH("unexpected SIMD op");
3698 : }
3699 :
3700 : void
3701 0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx8(LSimdBinaryArithIx8* ins)
3702 : {
3703 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3704 0 : Operand rhs = ToOperand(ins->rhs());
3705 0 : FloatRegister output = ToFloatRegister(ins->output());
3706 :
3707 0 : MSimdBinaryArith::Operation op = ins->operation();
3708 0 : switch (op) {
3709 : case MSimdBinaryArith::Op_add:
3710 0 : masm.vpaddw(rhs, lhs, output);
3711 0 : return;
3712 : case MSimdBinaryArith::Op_sub:
3713 0 : masm.vpsubw(rhs, lhs, output);
3714 0 : return;
3715 : case MSimdBinaryArith::Op_mul:
3716 0 : masm.vpmullw(rhs, lhs, output);
3717 0 : return;
3718 : case MSimdBinaryArith::Op_div:
3719 : case MSimdBinaryArith::Op_max:
3720 : case MSimdBinaryArith::Op_min:
3721 : case MSimdBinaryArith::Op_minNum:
3722 : case MSimdBinaryArith::Op_maxNum:
3723 0 : break;
3724 : }
3725 0 : MOZ_CRASH("unexpected SIMD op");
3726 : }
3727 :
3728 : void
3729 0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4* ins)
3730 : {
3731 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3732 0 : Operand rhs = ToOperand(ins->rhs());
3733 0 : FloatRegister output = ToFloatRegister(ins->output());
3734 :
3735 0 : ScratchSimd128Scope scratch(masm);
3736 :
3737 0 : MSimdBinaryArith::Operation op = ins->operation();
3738 0 : switch (op) {
3739 : case MSimdBinaryArith::Op_add:
3740 0 : masm.vpaddd(rhs, lhs, output);
3741 0 : return;
3742 : case MSimdBinaryArith::Op_sub:
3743 0 : masm.vpsubd(rhs, lhs, output);
3744 0 : return;
3745 : case MSimdBinaryArith::Op_mul: {
3746 0 : if (AssemblerX86Shared::HasSSE41()) {
3747 0 : masm.vpmulld(rhs, lhs, output);
3748 0 : return;
3749 : }
3750 :
3751 0 : masm.loadAlignedSimd128Int(rhs, scratch);
3752 0 : masm.vpmuludq(lhs, scratch, scratch);
3753 : // scratch contains (Rx, _, Rz, _) where R is the resulting vector.
3754 :
3755 0 : FloatRegister temp = ToFloatRegister(ins->temp());
3756 0 : masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), lhs, lhs);
3757 0 : masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), rhs, temp);
3758 0 : masm.vpmuludq(temp, lhs, lhs);
3759 : // lhs contains (Ry, _, Rw, _) where R is the resulting vector.
3760 :
3761 0 : masm.vshufps(MacroAssembler::ComputeShuffleMask(0, 2, 0, 2), scratch, lhs, lhs);
3762 : // lhs contains (Ry, Rw, Rx, Rz)
3763 0 : masm.vshufps(MacroAssembler::ComputeShuffleMask(2, 0, 3, 1), lhs, lhs, lhs);
3764 0 : return;
3765 : }
3766 : case MSimdBinaryArith::Op_div:
3767 : // x86 doesn't have SIMD i32 div.
3768 0 : break;
3769 : case MSimdBinaryArith::Op_max:
3770 : // we can do max with a single instruction only if we have SSE4.1
3771 : // using the PMAXSD instruction.
3772 0 : break;
3773 : case MSimdBinaryArith::Op_min:
3774 : // we can do max with a single instruction only if we have SSE4.1
3775 : // using the PMINSD instruction.
3776 0 : break;
3777 : case MSimdBinaryArith::Op_minNum:
3778 : case MSimdBinaryArith::Op_maxNum:
3779 0 : break;
3780 : }
3781 0 : MOZ_CRASH("unexpected SIMD op");
3782 : }
3783 :
3784 : void
3785 0 : CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
3786 : {
3787 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3788 0 : Operand rhs = ToOperand(ins->rhs());
3789 0 : FloatRegister output = ToFloatRegister(ins->output());
3790 :
3791 0 : ScratchSimd128Scope scratch(masm);
3792 :
3793 0 : MSimdBinaryArith::Operation op = ins->operation();
3794 0 : switch (op) {
3795 : case MSimdBinaryArith::Op_add:
3796 0 : masm.vaddps(rhs, lhs, output);
3797 0 : return;
3798 : case MSimdBinaryArith::Op_sub:
3799 0 : masm.vsubps(rhs, lhs, output);
3800 0 : return;
3801 : case MSimdBinaryArith::Op_mul:
3802 0 : masm.vmulps(rhs, lhs, output);
3803 0 : return;
3804 : case MSimdBinaryArith::Op_div:
3805 0 : masm.vdivps(rhs, lhs, output);
3806 0 : return;
3807 : case MSimdBinaryArith::Op_max: {
3808 0 : FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, scratch);
3809 0 : masm.vcmpunordps(rhs, lhsCopy, scratch);
3810 :
3811 0 : FloatRegister tmp = ToFloatRegister(ins->temp());
3812 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, tmp);
3813 0 : masm.vmaxps(Operand(lhs), rhsCopy, tmp);
3814 0 : masm.vmaxps(rhs, lhs, output);
3815 :
3816 0 : masm.vandps(tmp, output, output);
3817 0 : masm.vorps(scratch, output, output); // or in the all-ones NaNs
3818 0 : return;
3819 : }
3820 : case MSimdBinaryArith::Op_min: {
3821 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
3822 0 : masm.vminps(Operand(lhs), rhsCopy, scratch);
3823 0 : masm.vminps(rhs, lhs, output);
3824 0 : masm.vorps(scratch, output, output); // NaN or'd with arbitrary bits is NaN
3825 0 : return;
3826 : }
3827 : case MSimdBinaryArith::Op_minNum: {
3828 0 : FloatRegister tmp = ToFloatRegister(ins->temp());
3829 0 : masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
3830 :
3831 0 : FloatRegister mask = scratch;
3832 0 : FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch);
3833 0 : masm.vpcmpeqd(Operand(lhs), tmpCopy, mask);
3834 0 : masm.vandps(tmp, mask, mask);
3835 :
3836 0 : FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
3837 0 : masm.vminps(rhs, lhsCopy, tmp);
3838 0 : masm.vorps(mask, tmp, tmp);
3839 :
3840 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
3841 0 : masm.vcmpneqps(rhs, rhsCopy, mask);
3842 :
3843 0 : if (AssemblerX86Shared::HasAVX()) {
3844 0 : masm.vblendvps(mask, lhs, tmp, output);
3845 : } else {
3846 : // Emulate vblendvps.
3847 : // With SSE.4.1 we could use blendvps, however it's awkward since
3848 : // it requires the mask to be in xmm0.
3849 0 : if (lhs != output)
3850 0 : masm.moveSimd128Float(lhs, output);
3851 0 : masm.vandps(Operand(mask), output, output);
3852 0 : masm.vandnps(Operand(tmp), mask, mask);
3853 0 : masm.vorps(Operand(mask), output, output);
3854 : }
3855 0 : return;
3856 : }
3857 : case MSimdBinaryArith::Op_maxNum: {
3858 0 : FloatRegister mask = scratch;
3859 0 : masm.loadConstantSimd128Int(SimdConstant::SplatX4(0), mask);
3860 0 : masm.vpcmpeqd(Operand(lhs), mask, mask);
3861 :
3862 0 : FloatRegister tmp = ToFloatRegister(ins->temp());
3863 0 : masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
3864 0 : masm.vandps(tmp, mask, mask);
3865 :
3866 0 : FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
3867 0 : masm.vmaxps(rhs, lhsCopy, tmp);
3868 0 : masm.vandnps(Operand(tmp), mask, mask);
3869 :
3870 : // Ensure tmp always contains the temporary result
3871 0 : mask = tmp;
3872 0 : tmp = scratch;
3873 :
3874 0 : FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
3875 0 : masm.vcmpneqps(rhs, rhsCopy, mask);
3876 :
3877 0 : if (AssemblerX86Shared::HasAVX()) {
3878 0 : masm.vblendvps(mask, lhs, tmp, output);
3879 : } else {
3880 : // Emulate vblendvps.
3881 : // With SSE.4.1 we could use blendvps, however it's awkward since
3882 : // it requires the mask to be in xmm0.
3883 0 : if (lhs != output)
3884 0 : masm.moveSimd128Float(lhs, output);
3885 0 : masm.vandps(Operand(mask), output, output);
3886 0 : masm.vandnps(Operand(tmp), mask, mask);
3887 0 : masm.vorps(Operand(mask), output, output);
3888 : }
3889 0 : return;
3890 : }
3891 : }
3892 0 : MOZ_CRASH("unexpected SIMD op");
3893 : }
3894 :
3895 : void
3896 0 : CodeGeneratorX86Shared::visitSimdBinarySaturating(LSimdBinarySaturating* ins)
3897 : {
3898 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
3899 0 : Operand rhs = ToOperand(ins->rhs());
3900 0 : FloatRegister output = ToFloatRegister(ins->output());
3901 :
3902 0 : SimdSign sign = ins->signedness();
3903 0 : MOZ_ASSERT(sign != SimdSign::NotApplicable);
3904 :
3905 0 : switch (ins->type()) {
3906 : case MIRType::Int8x16:
3907 0 : switch (ins->operation()) {
3908 : case MSimdBinarySaturating::add:
3909 0 : if (sign == SimdSign::Signed)
3910 0 : masm.vpaddsb(rhs, lhs, output);
3911 : else
3912 0 : masm.vpaddusb(rhs, lhs, output);
3913 0 : return;
3914 : case MSimdBinarySaturating::sub:
3915 0 : if (sign == SimdSign::Signed)
3916 0 : masm.vpsubsb(rhs, lhs, output);
3917 : else
3918 0 : masm.vpsubusb(rhs, lhs, output);
3919 0 : return;
3920 : }
3921 0 : break;
3922 :
3923 : case MIRType::Int16x8:
3924 0 : switch (ins->operation()) {
3925 : case MSimdBinarySaturating::add:
3926 0 : if (sign == SimdSign::Signed)
3927 0 : masm.vpaddsw(rhs, lhs, output);
3928 : else
3929 0 : masm.vpaddusw(rhs, lhs, output);
3930 0 : return;
3931 : case MSimdBinarySaturating::sub:
3932 0 : if (sign == SimdSign::Signed)
3933 0 : masm.vpsubsw(rhs, lhs, output);
3934 : else
3935 0 : masm.vpsubusw(rhs, lhs, output);
3936 0 : return;
3937 : }
3938 0 : break;
3939 :
3940 : default:
3941 0 : break;
3942 : }
3943 0 : MOZ_CRASH("unsupported type for SIMD saturating arithmetic");
3944 : }
3945 :
3946 : void
3947 0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx16(LSimdUnaryArithIx16* ins)
3948 : {
3949 0 : Operand in = ToOperand(ins->input());
3950 0 : FloatRegister out = ToFloatRegister(ins->output());
3951 :
3952 0 : static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
3953 :
3954 0 : switch (ins->operation()) {
3955 : case MSimdUnaryArith::neg:
3956 0 : masm.zeroSimd128Int(out);
3957 0 : masm.packedSubInt8(in, out);
3958 0 : return;
3959 : case MSimdUnaryArith::not_:
3960 0 : masm.loadConstantSimd128Int(allOnes, out);
3961 0 : masm.bitwiseXorSimd128(in, out);
3962 0 : return;
3963 : case MSimdUnaryArith::abs:
3964 : case MSimdUnaryArith::reciprocalApproximation:
3965 : case MSimdUnaryArith::reciprocalSqrtApproximation:
3966 : case MSimdUnaryArith::sqrt:
3967 0 : break;
3968 : }
3969 0 : MOZ_CRASH("unexpected SIMD op");
3970 : }
3971 :
3972 : void
3973 0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx8(LSimdUnaryArithIx8* ins)
3974 : {
3975 0 : Operand in = ToOperand(ins->input());
3976 0 : FloatRegister out = ToFloatRegister(ins->output());
3977 :
3978 0 : static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
3979 :
3980 0 : switch (ins->operation()) {
3981 : case MSimdUnaryArith::neg:
3982 0 : masm.zeroSimd128Int(out);
3983 0 : masm.packedSubInt16(in, out);
3984 0 : return;
3985 : case MSimdUnaryArith::not_:
3986 0 : masm.loadConstantSimd128Int(allOnes, out);
3987 0 : masm.bitwiseXorSimd128(in, out);
3988 0 : return;
3989 : case MSimdUnaryArith::abs:
3990 : case MSimdUnaryArith::reciprocalApproximation:
3991 : case MSimdUnaryArith::reciprocalSqrtApproximation:
3992 : case MSimdUnaryArith::sqrt:
3993 0 : break;
3994 : }
3995 0 : MOZ_CRASH("unexpected SIMD op");
3996 : }
3997 :
3998 : void
3999 0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
4000 : {
4001 0 : Operand in = ToOperand(ins->input());
4002 0 : FloatRegister out = ToFloatRegister(ins->output());
4003 :
4004 0 : static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
4005 :
4006 0 : switch (ins->operation()) {
4007 : case MSimdUnaryArith::neg:
4008 0 : masm.zeroSimd128Int(out);
4009 0 : masm.packedSubInt32(in, out);
4010 0 : return;
4011 : case MSimdUnaryArith::not_:
4012 0 : masm.loadConstantSimd128Int(allOnes, out);
4013 0 : masm.bitwiseXorSimd128(in, out);
4014 0 : return;
4015 : case MSimdUnaryArith::abs:
4016 : case MSimdUnaryArith::reciprocalApproximation:
4017 : case MSimdUnaryArith::reciprocalSqrtApproximation:
4018 : case MSimdUnaryArith::sqrt:
4019 0 : break;
4020 : }
4021 0 : MOZ_CRASH("unexpected SIMD op");
4022 : }
4023 :
4024 : void
4025 0 : CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins)
4026 : {
4027 0 : Operand in = ToOperand(ins->input());
4028 0 : FloatRegister out = ToFloatRegister(ins->output());
4029 :
4030 : // All ones but the sign bit
4031 0 : float signMask = SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits);
4032 0 : static const SimdConstant signMasks = SimdConstant::SplatX4(signMask);
4033 :
4034 : // All ones including the sign bit
4035 0 : float ones = SpecificNaN<float>(1, FloatingPoint<float>::kSignificandBits);
4036 0 : static const SimdConstant allOnes = SimdConstant::SplatX4(ones);
4037 :
4038 : // All zeros but the sign bit
4039 0 : static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f);
4040 :
4041 0 : switch (ins->operation()) {
4042 : case MSimdUnaryArith::abs:
4043 0 : masm.loadConstantSimd128Float(signMasks, out);
4044 0 : masm.bitwiseAndSimd128(in, out);
4045 0 : return;
4046 : case MSimdUnaryArith::neg:
4047 0 : masm.loadConstantSimd128Float(minusZero, out);
4048 0 : masm.bitwiseXorSimd128(in, out);
4049 0 : return;
4050 : case MSimdUnaryArith::not_:
4051 0 : masm.loadConstantSimd128Float(allOnes, out);
4052 0 : masm.bitwiseXorSimd128(in, out);
4053 0 : return;
4054 : case MSimdUnaryArith::reciprocalApproximation:
4055 0 : masm.packedRcpApproximationFloat32x4(in, out);
4056 0 : return;
4057 : case MSimdUnaryArith::reciprocalSqrtApproximation:
4058 0 : masm.packedRcpSqrtApproximationFloat32x4(in, out);
4059 0 : return;
4060 : case MSimdUnaryArith::sqrt:
4061 0 : masm.packedSqrtFloat32x4(in, out);
4062 0 : return;
4063 : }
4064 0 : MOZ_CRASH("unexpected SIMD op");
4065 : }
4066 :
4067 : void
4068 0 : CodeGeneratorX86Shared::visitSimdBinaryBitwise(LSimdBinaryBitwise* ins)
4069 : {
4070 0 : FloatRegister lhs = ToFloatRegister(ins->lhs());
4071 0 : Operand rhs = ToOperand(ins->rhs());
4072 0 : FloatRegister output = ToFloatRegister(ins->output());
4073 :
4074 0 : MSimdBinaryBitwise::Operation op = ins->operation();
4075 0 : switch (op) {
4076 : case MSimdBinaryBitwise::and_:
4077 0 : if (ins->type() == MIRType::Float32x4)
4078 0 : masm.vandps(rhs, lhs, output);
4079 : else
4080 0 : masm.vpand(rhs, lhs, output);
4081 0 : return;
4082 : case MSimdBinaryBitwise::or_:
4083 0 : if (ins->type() == MIRType::Float32x4)
4084 0 : masm.vorps(rhs, lhs, output);
4085 : else
4086 0 : masm.vpor(rhs, lhs, output);
4087 0 : return;
4088 : case MSimdBinaryBitwise::xor_:
4089 0 : if (ins->type() == MIRType::Float32x4)
4090 0 : masm.vxorps(rhs, lhs, output);
4091 : else
4092 0 : masm.vpxor(rhs, lhs, output);
4093 0 : return;
4094 : }
4095 0 : MOZ_CRASH("unexpected SIMD bitwise op");
4096 : }
4097 :
4098 : void
4099 0 : CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
4100 : {
4101 0 : FloatRegister out = ToFloatRegister(ins->output());
4102 0 : MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);
4103 :
4104 : // The shift amount is masked to the number of bits in a lane.
4105 0 : uint32_t shiftmask = (128u / SimdTypeToLength(ins->type())) - 1;
4106 :
4107 : // Note that SSE doesn't have instructions for shifting 8x16 vectors.
4108 : // These shifts are synthesized by the MSimdShift::AddLegalized() function.
4109 0 : const LAllocation* val = ins->value();
4110 0 : if (val->isConstant()) {
4111 0 : MOZ_ASSERT(ins->temp()->isBogusTemp());
4112 0 : Imm32 count(uint32_t(ToInt32(val)) & shiftmask);
4113 0 : switch (ins->type()) {
4114 : case MIRType::Int16x8:
4115 0 : switch (ins->operation()) {
4116 : case MSimdShift::lsh:
4117 0 : masm.packedLeftShiftByScalarInt16x8(count, out);
4118 0 : return;
4119 : case MSimdShift::rsh:
4120 0 : masm.packedRightShiftByScalarInt16x8(count, out);
4121 0 : return;
4122 : case MSimdShift::ursh:
4123 0 : masm.packedUnsignedRightShiftByScalarInt16x8(count, out);
4124 0 : return;
4125 : }
4126 0 : break;
4127 : case MIRType::Int32x4:
4128 0 : switch (ins->operation()) {
4129 : case MSimdShift::lsh:
4130 0 : masm.packedLeftShiftByScalarInt32x4(count, out);
4131 0 : return;
4132 : case MSimdShift::rsh:
4133 0 : masm.packedRightShiftByScalarInt32x4(count, out);
4134 0 : return;
4135 : case MSimdShift::ursh:
4136 0 : masm.packedUnsignedRightShiftByScalarInt32x4(count, out);
4137 0 : return;
4138 : }
4139 0 : break;
4140 : default:
4141 0 : MOZ_CRASH("unsupported type for SIMD shifts");
4142 : }
4143 0 : MOZ_CRASH("unexpected SIMD bitwise op");
4144 : }
4145 :
4146 : // Truncate val to 5 bits. We should have a temp register for that.
4147 0 : MOZ_ASSERT(val->isRegister());
4148 0 : Register count = ToRegister(ins->temp());
4149 0 : masm.mov(ToRegister(val), count);
4150 0 : masm.andl(Imm32(shiftmask), count);
4151 0 : ScratchFloat32Scope scratch(masm);
4152 0 : masm.vmovd(count, scratch);
4153 :
4154 0 : switch (ins->type()) {
4155 : case MIRType::Int16x8:
4156 0 : switch (ins->operation()) {
4157 : case MSimdShift::lsh:
4158 0 : masm.packedLeftShiftByScalarInt16x8(scratch, out);
4159 0 : return;
4160 : case MSimdShift::rsh:
4161 0 : masm.packedRightShiftByScalarInt16x8(scratch, out);
4162 0 : return;
4163 : case MSimdShift::ursh:
4164 0 : masm.packedUnsignedRightShiftByScalarInt16x8(scratch, out);
4165 0 : return;
4166 : }
4167 0 : break;
4168 : case MIRType::Int32x4:
4169 0 : switch (ins->operation()) {
4170 : case MSimdShift::lsh:
4171 0 : masm.packedLeftShiftByScalarInt32x4(scratch, out);
4172 0 : return;
4173 : case MSimdShift::rsh:
4174 0 : masm.packedRightShiftByScalarInt32x4(scratch, out);
4175 0 : return;
4176 : case MSimdShift::ursh:
4177 0 : masm.packedUnsignedRightShiftByScalarInt32x4(scratch, out);
4178 0 : return;
4179 : }
4180 0 : break;
4181 : default:
4182 0 : MOZ_CRASH("unsupported type for SIMD shifts");
4183 : }
4184 0 : MOZ_CRASH("unexpected SIMD bitwise op");
4185 : }
4186 :
4187 : void
4188 0 : CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins)
4189 : {
4190 0 : FloatRegister mask = ToFloatRegister(ins->mask());
4191 0 : FloatRegister onTrue = ToFloatRegister(ins->lhs());
4192 0 : FloatRegister onFalse = ToFloatRegister(ins->rhs());
4193 0 : FloatRegister output = ToFloatRegister(ins->output());
4194 0 : FloatRegister temp = ToFloatRegister(ins->temp());
4195 :
4196 0 : if (onTrue != output)
4197 0 : masm.vmovaps(onTrue, output);
4198 0 : if (mask != temp)
4199 0 : masm.vmovaps(mask, temp);
4200 :
4201 0 : MSimdSelect* mir = ins->mir();
4202 0 : unsigned lanes = SimdTypeToLength(mir->type());
4203 :
4204 0 : if (AssemblerX86Shared::HasAVX() && lanes == 4) {
4205 : // TBD: Use vpblendvb for lanes > 4, HasAVX.
4206 0 : masm.vblendvps(mask, onTrue, onFalse, output);
4207 0 : return;
4208 : }
4209 :
4210 : // SSE4.1 has plain blendvps which can do this, but it is awkward
4211 : // to use because it requires the mask to be in xmm0.
4212 :
4213 0 : masm.bitwiseAndSimd128(Operand(temp), output);
4214 0 : masm.bitwiseAndNotSimd128(Operand(onFalse), temp);
4215 0 : masm.bitwiseOrSimd128(Operand(temp), output);
4216 : }
4217 :
4218 : void
4219 0 : CodeGeneratorX86Shared::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir)
4220 : {
4221 0 : Register elements = ToRegister(lir->elements());
4222 0 : AnyRegister output = ToAnyRegister(lir->output());
4223 0 : Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
4224 :
4225 0 : Register oldval = ToRegister(lir->oldval());
4226 0 : Register newval = ToRegister(lir->newval());
4227 :
4228 0 : Scalar::Type arrayType = lir->mir()->arrayType();
4229 0 : int width = Scalar::byteSize(arrayType);
4230 :
4231 0 : if (lir->index()->isConstant()) {
4232 0 : Address dest(elements, ToInt32(lir->index()) * width);
4233 0 : masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
4234 : } else {
4235 0 : BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
4236 0 : masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
4237 : }
4238 0 : }
4239 :
4240 : void
4241 0 : CodeGeneratorX86Shared::visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir)
4242 : {
4243 0 : Register elements = ToRegister(lir->elements());
4244 0 : AnyRegister output = ToAnyRegister(lir->output());
4245 0 : Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
4246 :
4247 0 : Register value = ToRegister(lir->value());
4248 :
4249 0 : Scalar::Type arrayType = lir->mir()->arrayType();
4250 0 : int width = Scalar::byteSize(arrayType);
4251 :
4252 0 : if (lir->index()->isConstant()) {
4253 0 : Address dest(elements, ToInt32(lir->index()) * width);
4254 0 : masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
4255 : } else {
4256 0 : BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
4257 0 : masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
4258 : }
4259 0 : }
4260 :
4261 : template<typename S, typename T>
4262 : void
4263 0 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
4264 : const T& mem, Register temp1, Register temp2, AnyRegister output)
4265 : {
4266 0 : switch (arrayType) {
4267 : case Scalar::Int8:
4268 0 : switch (op) {
4269 : case AtomicFetchAddOp:
4270 0 : masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr());
4271 0 : break;
4272 : case AtomicFetchSubOp:
4273 0 : masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr());
4274 0 : break;
4275 : case AtomicFetchAndOp:
4276 0 : masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr());
4277 0 : break;
4278 : case AtomicFetchOrOp:
4279 0 : masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr());
4280 0 : break;
4281 : case AtomicFetchXorOp:
4282 0 : masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr());
4283 0 : break;
4284 : default:
4285 0 : MOZ_CRASH("Invalid typed array atomic operation");
4286 : }
4287 0 : break;
4288 : case Scalar::Uint8:
4289 0 : switch (op) {
4290 : case AtomicFetchAddOp:
4291 0 : masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr());
4292 0 : break;
4293 : case AtomicFetchSubOp:
4294 0 : masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr());
4295 0 : break;
4296 : case AtomicFetchAndOp:
4297 0 : masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr());
4298 0 : break;
4299 : case AtomicFetchOrOp:
4300 0 : masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr());
4301 0 : break;
4302 : case AtomicFetchXorOp:
4303 0 : masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr());
4304 0 : break;
4305 : default:
4306 0 : MOZ_CRASH("Invalid typed array atomic operation");
4307 : }
4308 0 : break;
4309 : case Scalar::Int16:
4310 0 : switch (op) {
4311 : case AtomicFetchAddOp:
4312 0 : masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr());
4313 0 : break;
4314 : case AtomicFetchSubOp:
4315 0 : masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr());
4316 0 : break;
4317 : case AtomicFetchAndOp:
4318 0 : masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr());
4319 0 : break;
4320 : case AtomicFetchOrOp:
4321 0 : masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr());
4322 0 : break;
4323 : case AtomicFetchXorOp:
4324 0 : masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr());
4325 0 : break;
4326 : default:
4327 0 : MOZ_CRASH("Invalid typed array atomic operation");
4328 : }
4329 0 : break;
4330 : case Scalar::Uint16:
4331 0 : switch (op) {
4332 : case AtomicFetchAddOp:
4333 0 : masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr());
4334 0 : break;
4335 : case AtomicFetchSubOp:
4336 0 : masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr());
4337 0 : break;
4338 : case AtomicFetchAndOp:
4339 0 : masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr());
4340 0 : break;
4341 : case AtomicFetchOrOp:
4342 0 : masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr());
4343 0 : break;
4344 : case AtomicFetchXorOp:
4345 0 : masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr());
4346 0 : break;
4347 : default:
4348 0 : MOZ_CRASH("Invalid typed array atomic operation");
4349 : }
4350 0 : break;
4351 : case Scalar::Int32:
4352 0 : switch (op) {
4353 : case AtomicFetchAddOp:
4354 0 : masm.atomicFetchAdd32(value, mem, temp1, output.gpr());
4355 0 : break;
4356 : case AtomicFetchSubOp:
4357 0 : masm.atomicFetchSub32(value, mem, temp1, output.gpr());
4358 0 : break;
4359 : case AtomicFetchAndOp:
4360 0 : masm.atomicFetchAnd32(value, mem, temp1, output.gpr());
4361 0 : break;
4362 : case AtomicFetchOrOp:
4363 0 : masm.atomicFetchOr32(value, mem, temp1, output.gpr());
4364 0 : break;
4365 : case AtomicFetchXorOp:
4366 0 : masm.atomicFetchXor32(value, mem, temp1, output.gpr());
4367 0 : break;
4368 : default:
4369 0 : MOZ_CRASH("Invalid typed array atomic operation");
4370 : }
4371 0 : break;
4372 : case Scalar::Uint32:
4373 : // At the moment, the code in MCallOptimize.cpp requires the output
4374 : // type to be double for uint32 arrays. See bug 1077305.
4375 0 : MOZ_ASSERT(output.isFloat());
4376 0 : switch (op) {
4377 : case AtomicFetchAddOp:
4378 0 : masm.atomicFetchAdd32(value, mem, InvalidReg, temp1);
4379 0 : break;
4380 : case AtomicFetchSubOp:
4381 0 : masm.atomicFetchSub32(value, mem, InvalidReg, temp1);
4382 0 : break;
4383 : case AtomicFetchAndOp:
4384 0 : masm.atomicFetchAnd32(value, mem, temp2, temp1);
4385 0 : break;
4386 : case AtomicFetchOrOp:
4387 0 : masm.atomicFetchOr32(value, mem, temp2, temp1);
4388 0 : break;
4389 : case AtomicFetchXorOp:
4390 0 : masm.atomicFetchXor32(value, mem, temp2, temp1);
4391 0 : break;
4392 : default:
4393 0 : MOZ_CRASH("Invalid typed array atomic operation");
4394 : }
4395 0 : masm.convertUInt32ToDouble(temp1, output.fpu());
4396 0 : break;
4397 : default:
4398 0 : MOZ_CRASH("Invalid typed array type");
4399 : }
4400 0 : }
4401 :
4402 : template void
4403 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4404 : const Imm32& value, const Address& mem,
4405 : Register temp1, Register temp2, AnyRegister output);
4406 : template void
4407 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4408 : const Imm32& value, const BaseIndex& mem,
4409 : Register temp1, Register temp2, AnyRegister output);
4410 : template void
4411 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4412 : const Register& value, const Address& mem,
4413 : Register temp1, Register temp2, AnyRegister output);
4414 : template void
4415 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4416 : const Register& value, const BaseIndex& mem,
4417 : Register temp1, Register temp2, AnyRegister output);
4418 :
4419 : // Binary operation for effect, result discarded.
4420 : template<typename S, typename T>
4421 : void
4422 0 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
4423 : const T& mem)
4424 : {
4425 0 : switch (arrayType) {
4426 : case Scalar::Int8:
4427 : case Scalar::Uint8:
4428 0 : switch (op) {
4429 : case AtomicFetchAddOp:
4430 0 : masm.atomicAdd8(value, mem);
4431 0 : break;
4432 : case AtomicFetchSubOp:
4433 0 : masm.atomicSub8(value, mem);
4434 0 : break;
4435 : case AtomicFetchAndOp:
4436 0 : masm.atomicAnd8(value, mem);
4437 0 : break;
4438 : case AtomicFetchOrOp:
4439 0 : masm.atomicOr8(value, mem);
4440 0 : break;
4441 : case AtomicFetchXorOp:
4442 0 : masm.atomicXor8(value, mem);
4443 0 : break;
4444 : default:
4445 0 : MOZ_CRASH("Invalid typed array atomic operation");
4446 : }
4447 0 : break;
4448 : case Scalar::Int16:
4449 : case Scalar::Uint16:
4450 0 : switch (op) {
4451 : case AtomicFetchAddOp:
4452 0 : masm.atomicAdd16(value, mem);
4453 0 : break;
4454 : case AtomicFetchSubOp:
4455 0 : masm.atomicSub16(value, mem);
4456 0 : break;
4457 : case AtomicFetchAndOp:
4458 0 : masm.atomicAnd16(value, mem);
4459 0 : break;
4460 : case AtomicFetchOrOp:
4461 0 : masm.atomicOr16(value, mem);
4462 0 : break;
4463 : case AtomicFetchXorOp:
4464 0 : masm.atomicXor16(value, mem);
4465 0 : break;
4466 : default:
4467 0 : MOZ_CRASH("Invalid typed array atomic operation");
4468 : }
4469 0 : break;
4470 : case Scalar::Int32:
4471 : case Scalar::Uint32:
4472 0 : switch (op) {
4473 : case AtomicFetchAddOp:
4474 0 : masm.atomicAdd32(value, mem);
4475 0 : break;
4476 : case AtomicFetchSubOp:
4477 0 : masm.atomicSub32(value, mem);
4478 0 : break;
4479 : case AtomicFetchAndOp:
4480 0 : masm.atomicAnd32(value, mem);
4481 0 : break;
4482 : case AtomicFetchOrOp:
4483 0 : masm.atomicOr32(value, mem);
4484 0 : break;
4485 : case AtomicFetchXorOp:
4486 0 : masm.atomicXor32(value, mem);
4487 0 : break;
4488 : default:
4489 0 : MOZ_CRASH("Invalid typed array atomic operation");
4490 : }
4491 0 : break;
4492 : default:
4493 0 : MOZ_CRASH("Invalid typed array type");
4494 : }
4495 0 : }
4496 :
4497 : template void
4498 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4499 : const Imm32& value, const Address& mem);
4500 : template void
4501 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4502 : const Imm32& value, const BaseIndex& mem);
4503 : template void
4504 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4505 : const Register& value, const Address& mem);
4506 : template void
4507 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
4508 : const Register& value, const BaseIndex& mem);
4509 :
4510 :
4511 : template <typename T>
4512 : static inline void
4513 0 : AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
4514 : Scalar::Type arrayType, const LAllocation* value, const T& mem,
4515 : Register temp1, Register temp2, AnyRegister output)
4516 : {
4517 0 : if (value->isConstant())
4518 0 : cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output);
4519 : else
4520 0 : cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output);
4521 0 : }
4522 :
4523 : void
4524 0 : CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir)
4525 : {
4526 0 : MOZ_ASSERT(lir->mir()->hasUses());
4527 :
4528 0 : AnyRegister output = ToAnyRegister(lir->output());
4529 0 : Register elements = ToRegister(lir->elements());
4530 0 : Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1());
4531 0 : Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
4532 0 : const LAllocation* value = lir->value();
4533 :
4534 0 : Scalar::Type arrayType = lir->mir()->arrayType();
4535 0 : int width = Scalar::byteSize(arrayType);
4536 :
4537 0 : if (lir->index()->isConstant()) {
4538 0 : Address mem(elements, ToInt32(lir->index()) * width);
4539 0 : AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
4540 : } else {
4541 0 : BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
4542 0 : AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
4543 : }
4544 0 : }
4545 :
4546 : template <typename T>
4547 : static inline void
4548 0 : AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
4549 : Scalar::Type arrayType, const LAllocation* value, const T& mem)
4550 : {
4551 0 : if (value->isConstant())
4552 0 : cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem);
4553 : else
4554 0 : cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem);
4555 0 : }
4556 :
4557 : void
4558 0 : CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir)
4559 : {
4560 0 : MOZ_ASSERT(!lir->mir()->hasUses());
4561 :
4562 0 : Register elements = ToRegister(lir->elements());
4563 0 : const LAllocation* value = lir->value();
4564 0 : Scalar::Type arrayType = lir->mir()->arrayType();
4565 0 : int width = Scalar::byteSize(arrayType);
4566 :
4567 0 : if (lir->index()->isConstant()) {
4568 0 : Address mem(elements, ToInt32(lir->index()) * width);
4569 0 : AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
4570 : } else {
4571 0 : BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
4572 0 : AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
4573 : }
4574 0 : }
4575 :
4576 : void
4577 0 : CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier* ins)
4578 : {
4579 0 : if (ins->type() & MembarStoreLoad)
4580 0 : masm.storeLoadFence();
4581 0 : }
4582 :
4583 : void
4584 0 : CodeGeneratorX86Shared::setReturnDoubleRegs(LiveRegisterSet* regs)
4585 : {
4586 0 : MOZ_ASSERT(ReturnFloat32Reg.encoding() == X86Encoding::xmm0);
4587 0 : MOZ_ASSERT(ReturnDoubleReg.encoding() == X86Encoding::xmm0);
4588 0 : MOZ_ASSERT(ReturnSimd128Reg.encoding() == X86Encoding::xmm0);
4589 0 : regs->add(ReturnFloat32Reg);
4590 0 : regs->add(ReturnDoubleReg);
4591 0 : regs->add(ReturnSimd128Reg);
4592 0 : }
4593 :
4594 : void
4595 0 : CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool)
4596 : {
4597 0 : FloatRegister input = ool->input();
4598 0 : MIRType fromType = ool->fromType();
4599 0 : MIRType toType = ool->toType();
4600 0 : Label* oolRejoin = ool->rejoin();
4601 0 : bool isUnsigned = ool->isUnsigned();
4602 0 : wasm::BytecodeOffset off = ool->bytecodeOffset();
4603 :
4604 0 : if (fromType == MIRType::Float32) {
4605 0 : if (toType == MIRType::Int32)
4606 0 : masm.outOfLineWasmTruncateFloat32ToInt32(input, isUnsigned, off, oolRejoin);
4607 0 : else if (toType == MIRType::Int64)
4608 0 : masm.outOfLineWasmTruncateFloat32ToInt64(input, isUnsigned, off, oolRejoin);
4609 : else
4610 0 : MOZ_CRASH("unexpected type");
4611 0 : } else if (fromType == MIRType::Double) {
4612 0 : if (toType == MIRType::Int32)
4613 0 : masm.outOfLineWasmTruncateDoubleToInt32(input, isUnsigned, off, oolRejoin);
4614 0 : else if (toType == MIRType::Int64)
4615 0 : masm.outOfLineWasmTruncateDoubleToInt64(input, isUnsigned, off, oolRejoin);
4616 : else
4617 0 : MOZ_CRASH("unexpected type");
4618 : } else {
4619 0 : MOZ_CRASH("unexpected type");
4620 : }
4621 0 : }
4622 :
4623 : void
4624 0 : CodeGeneratorX86Shared::canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value)
4625 : {
4626 : #ifdef JS_MORE_DETERMINISTIC
4627 : switch (type) {
4628 : case Scalar::Float32: {
4629 : FloatRegister in = ToFloatRegister(value);
4630 : masm.canonicalizeFloatIfDeterministic(in);
4631 : break;
4632 : }
4633 : case Scalar::Float64: {
4634 : FloatRegister in = ToFloatRegister(value);
4635 : masm.canonicalizeDoubleIfDeterministic(in);
4636 : break;
4637 : }
4638 : case Scalar::Float32x4: {
4639 : FloatRegister in = ToFloatRegister(value);
4640 : MOZ_ASSERT(in.isSimd128());
4641 : FloatRegister scratch = in != xmm0.asSimd128() ? xmm0 : xmm1;
4642 : masm.push(scratch);
4643 : masm.canonicalizeFloat32x4(in, scratch);
4644 : masm.pop(scratch);
4645 : break;
4646 : }
4647 : default: {
4648 : // Other types don't need canonicalization.
4649 : break;
4650 : }
4651 : }
4652 : #endif // JS_MORE_DETERMINISTIC
4653 0 : }
4654 :
4655 : void
4656 0 : CodeGeneratorX86Shared::visitCopySignF(LCopySignF* lir)
4657 : {
4658 0 : FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
4659 0 : FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
4660 :
4661 0 : FloatRegister out = ToFloatRegister(lir->output());
4662 :
4663 0 : if (lhs == rhs) {
4664 0 : if (lhs != out)
4665 0 : masm.moveFloat32(lhs, out);
4666 0 : return;
4667 : }
4668 :
4669 0 : ScratchFloat32Scope scratch(masm);
4670 :
4671 0 : float clearSignMask = BitwiseCast<float>(INT32_MAX);
4672 0 : masm.loadConstantFloat32(clearSignMask, scratch);
4673 0 : masm.vandps(scratch, lhs, out);
4674 :
4675 0 : float keepSignMask = BitwiseCast<float>(INT32_MIN);
4676 0 : masm.loadConstantFloat32(keepSignMask, scratch);
4677 0 : masm.vandps(rhs, scratch, scratch);
4678 :
4679 0 : masm.vorps(scratch, out, out);
4680 : }
4681 :
4682 : void
4683 0 : CodeGeneratorX86Shared::visitCopySignD(LCopySignD* lir)
4684 : {
4685 0 : FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
4686 0 : FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
4687 :
4688 0 : FloatRegister out = ToFloatRegister(lir->output());
4689 :
4690 0 : if (lhs == rhs) {
4691 0 : if (lhs != out)
4692 0 : masm.moveDouble(lhs, out);
4693 0 : return;
4694 : }
4695 :
4696 0 : ScratchDoubleScope scratch(masm);
4697 :
4698 0 : double clearSignMask = BitwiseCast<double>(INT64_MAX);
4699 0 : masm.loadConstantDouble(clearSignMask, scratch);
4700 0 : masm.vandpd(scratch, lhs, out);
4701 :
4702 0 : double keepSignMask = BitwiseCast<double>(INT64_MIN);
4703 0 : masm.loadConstantDouble(keepSignMask, scratch);
4704 0 : masm.vandpd(rhs, scratch, scratch);
4705 :
4706 0 : masm.vorpd(scratch, out, out);
4707 : }
4708 :
4709 : void
4710 0 : CodeGeneratorX86Shared::visitRotateI64(LRotateI64* lir)
4711 : {
4712 0 : MRotate* mir = lir->mir();
4713 0 : LAllocation* count = lir->count();
4714 :
4715 0 : Register64 input = ToRegister64(lir->input());
4716 0 : Register64 output = ToOutRegister64(lir);
4717 0 : Register temp = ToTempRegisterOrInvalid(lir->temp());
4718 :
4719 0 : MOZ_ASSERT(input == output);
4720 :
4721 0 : if (count->isConstant()) {
4722 0 : int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
4723 0 : if (!c)
4724 0 : return;
4725 0 : if (mir->isLeftRotate())
4726 0 : masm.rotateLeft64(Imm32(c), input, output, temp);
4727 : else
4728 0 : masm.rotateRight64(Imm32(c), input, output, temp);
4729 : } else {
4730 0 : if (mir->isLeftRotate())
4731 0 : masm.rotateLeft64(ToRegister(count), input, output, temp);
4732 : else
4733 0 : masm.rotateRight64(ToRegister(count), input, output, temp);
4734 : }
4735 : }
4736 :
4737 : void
4738 0 : CodeGeneratorX86Shared::visitPopcntI64(LPopcntI64* lir)
4739 : {
4740 0 : Register64 input = ToRegister64(lir->getInt64Operand(0));
4741 0 : Register64 output = ToOutRegister64(lir);
4742 0 : Register temp = InvalidReg;
4743 0 : if (!AssemblerX86Shared::HasPOPCNT())
4744 0 : temp = ToRegister(lir->getTemp(0));
4745 :
4746 0 : masm.popcnt64(input, output, temp);
4747 0 : }
4748 :
4749 : } // namespace jit
4750 : } // namespace js
|