LCOV - output.info - js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp

LCOV - code coverage report

Current view:	top level - js/src/jit/x86-shared - CodeGenerator-x86-shared.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	133	2523	5.3 %
Date:	2017-07-14 16:53:18	Functions:	28	187	15.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99:
       3             :  * This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #include "jit/x86-shared/CodeGenerator-x86-shared.h"
       8             : 
       9             : #include "mozilla/DebugOnly.h"
      10             : #include "mozilla/MathAlgorithms.h"
      11             : 
      12             : #include "jsmath.h"
      13             : 
      14             : #include "jit/JitCompartment.h"
      15             : #include "jit/JitFrames.h"
      16             : #include "jit/Linker.h"
      17             : #include "jit/RangeAnalysis.h"
      18             : #include "vm/TraceLogging.h"
      19             : 
      20             : #include "jit/MacroAssembler-inl.h"
      21             : #include "jit/shared/CodeGenerator-shared-inl.h"
      22             : 
      23             : using namespace js;
      24             : using namespace js::jit;
      25             : 
      26             : using mozilla::Abs;
      27             : using mozilla::BitwiseCast;
      28             : using mozilla::DebugOnly;
      29             : using mozilla::FloatingPoint;
      30             : using mozilla::FloorLog2;
      31             : using mozilla::NegativeInfinity;
      32             : using mozilla::SpecificNaN;
      33             : 
      34             : using JS::GenericNaN;
      35             : 
      36             : namespace js {
      37             : namespace jit {
      38             : 
      39           8 : CodeGeneratorX86Shared::CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm)
      40           8 :   : CodeGeneratorShared(gen, graph, masm)
      41             : {
      42           8 : }
      43             : 
      44             : #ifdef JS_PUNBOX64
      45             : Operand
      46           0 : CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
      47             : {
      48           0 :     return ToOperand(input.value());
      49             : }
      50             : #else
      51             : Register64
      52             : CodeGeneratorX86Shared::ToOperandOrRegister64(const LInt64Allocation input)
      53             : {
      54             :     return ToRegister64(input);
      55             : }
      56             : #endif
      57             : 
      58             : void
      59         370 : OutOfLineBailout::accept(CodeGeneratorX86Shared* codegen)
      60             : {
      61         370 :     codegen->visitOutOfLineBailout(this);
      62         370 : }
      63             : 
      64             : void
      65          78 : CodeGeneratorX86Shared::emitBranch(Assembler::Condition cond, MBasicBlock* mirTrue,
      66             :                                    MBasicBlock* mirFalse, Assembler::NaNCond ifNaN)
      67             : {
      68          78 :     if (ifNaN == Assembler::NaN_IsFalse)
      69           0 :         jumpToBlock(mirFalse, Assembler::Parity);
      70          78 :     else if (ifNaN == Assembler::NaN_IsTrue)
      71           0 :         jumpToBlock(mirTrue, Assembler::Parity);
      72             : 
      73          78 :     if (isNextBlock(mirFalse->lir())) {
      74          31 :         jumpToBlock(mirTrue, cond);
      75             :     } else {
      76          47 :         jumpToBlock(mirFalse, Assembler::InvertCondition(cond));
      77          47 :         jumpToBlock(mirTrue);
      78             :     }
      79          78 : }
      80             : 
      81             : void
      82           0 : CodeGeneratorX86Shared::visitDouble(LDouble* ins)
      83             : {
      84           0 :     const LDefinition* out = ins->getDef(0);
      85           0 :     masm.loadConstantDouble(ins->getDouble(), ToFloatRegister(out));
      86           0 : }
      87             : 
      88             : void
      89           0 : CodeGeneratorX86Shared::visitFloat32(LFloat32* ins)
      90             : {
      91           0 :     const LDefinition* out = ins->getDef(0);
      92           0 :     masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out));
      93           0 : }
      94             : 
      95             : void
      96          55 : CodeGeneratorX86Shared::visitTestIAndBranch(LTestIAndBranch* test)
      97             : {
      98          55 :     Register input = ToRegister(test->input());
      99          55 :     masm.test32(input, input);
     100          55 :     emitBranch(Assembler::NonZero, test->ifTrue(), test->ifFalse());
     101          55 : }
     102             : 
     103             : void
     104           0 : CodeGeneratorX86Shared::visitTestDAndBranch(LTestDAndBranch* test)
     105             : {
     106           0 :     const LAllocation* opd = test->input();
     107             : 
     108             :     // vucomisd flags:
     109             :     //             Z  P  C
     110             :     //            ---------
     111             :     //      NaN    1  1  1
     112             :     //        >    0  0  0
     113             :     //        <    0  0  1
     114             :     //        =    1  0  0
     115             :     //
     116             :     // NaN is falsey, so comparing against 0 and then using the Z flag is
     117             :     // enough to determine which branch to take.
     118           0 :     ScratchDoubleScope scratch(masm);
     119           0 :     masm.zeroDouble(scratch);
     120           0 :     masm.vucomisd(scratch, ToFloatRegister(opd));
     121           0 :     emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
     122           0 : }
     123             : 
     124             : void
     125           0 : CodeGeneratorX86Shared::visitTestFAndBranch(LTestFAndBranch* test)
     126             : {
     127           0 :     const LAllocation* opd = test->input();
     128             :     // vucomiss flags are the same as doubles; see comment above
     129             :     {
     130           0 :         ScratchFloat32Scope scratch(masm);
     131           0 :         masm.zeroFloat32(scratch);
     132           0 :         masm.vucomiss(scratch, ToFloatRegister(opd));
     133             :     }
     134           0 :     emitBranch(Assembler::NotEqual, test->ifTrue(), test->ifFalse());
     135           0 : }
     136             : 
     137             : void
     138           0 : CodeGeneratorX86Shared::visitBitAndAndBranch(LBitAndAndBranch* baab)
     139             : {
     140           0 :     if (baab->right()->isConstant())
     141           0 :         masm.test32(ToRegister(baab->left()), Imm32(ToInt32(baab->right())));
     142             :     else
     143           0 :         masm.test32(ToRegister(baab->left()), ToRegister(baab->right()));
     144           0 :     emitBranch(baab->cond(), baab->ifTrue(), baab->ifFalse());
     145           0 : }
     146             : 
     147             : void
     148          45 : CodeGeneratorX86Shared::emitCompare(MCompare::CompareType type, const LAllocation* left, const LAllocation* right)
     149             : {
     150             : #ifdef JS_CODEGEN_X64
     151          45 :     if (type == MCompare::Compare_Object || type == MCompare::Compare_Symbol) {
     152           0 :         masm.cmpPtr(ToRegister(left), ToOperand(right));
     153           0 :         return;
     154             :     }
     155             : #endif
     156             : 
     157          45 :     if (right->isConstant())
     158          37 :         masm.cmp32(ToRegister(left), Imm32(ToInt32(right)));
     159             :     else
     160           8 :         masm.cmp32(ToRegister(left), ToOperand(right));
     161             : }
     162             : 
     163             : void
     164          22 : CodeGeneratorX86Shared::visitCompare(LCompare* comp)
     165             : {
     166          22 :     MCompare* mir = comp->mir();
     167          22 :     emitCompare(mir->compareType(), comp->left(), comp->right());
     168          22 :     masm.emitSet(JSOpToCondition(mir->compareType(), comp->jsop()), ToRegister(comp->output()));
     169          22 : }
     170             : 
     171             : void
     172          23 : CodeGeneratorX86Shared::visitCompareAndBranch(LCompareAndBranch* comp)
     173             : {
     174          23 :     MCompare* mir = comp->cmpMir();
     175          23 :     emitCompare(mir->compareType(), comp->left(), comp->right());
     176          23 :     Assembler::Condition cond = JSOpToCondition(mir->compareType(), comp->jsop());
     177          23 :     emitBranch(cond, comp->ifTrue(), comp->ifFalse());
     178          23 : }
     179             : 
     180             : void
     181           0 : CodeGeneratorX86Shared::visitCompareD(LCompareD* comp)
     182             : {
     183           0 :     FloatRegister lhs = ToFloatRegister(comp->left());
     184           0 :     FloatRegister rhs = ToFloatRegister(comp->right());
     185             : 
     186           0 :     Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
     187             : 
     188           0 :     Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
     189           0 :     if (comp->mir()->operandsAreNeverNaN())
     190           0 :         nanCond = Assembler::NaN_HandledByCond;
     191             : 
     192           0 :     masm.compareDouble(cond, lhs, rhs);
     193           0 :     masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
     194           0 : }
     195             : 
     196             : void
     197           0 : CodeGeneratorX86Shared::visitCompareF(LCompareF* comp)
     198             : {
     199           0 :     FloatRegister lhs = ToFloatRegister(comp->left());
     200           0 :     FloatRegister rhs = ToFloatRegister(comp->right());
     201             : 
     202           0 :     Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->mir()->jsop());
     203             : 
     204           0 :     Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
     205           0 :     if (comp->mir()->operandsAreNeverNaN())
     206           0 :         nanCond = Assembler::NaN_HandledByCond;
     207             : 
     208           0 :     masm.compareFloat(cond, lhs, rhs);
     209           0 :     masm.emitSet(Assembler::ConditionFromDoubleCondition(cond), ToRegister(comp->output()), nanCond);
     210           0 : }
     211             : 
     212             : void
     213           0 : CodeGeneratorX86Shared::visitNotI(LNotI* ins)
     214             : {
     215           0 :     masm.cmp32(ToRegister(ins->input()), Imm32(0));
     216           0 :     masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
     217           0 : }
     218             : 
     219             : void
     220           0 : CodeGeneratorX86Shared::visitNotD(LNotD* ins)
     221             : {
     222           0 :     FloatRegister opd = ToFloatRegister(ins->input());
     223             : 
     224             :     // Not returns true if the input is a NaN. We don't have to worry about
     225             :     // it if we know the input is never NaN though.
     226           0 :     Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
     227           0 :     if (ins->mir()->operandIsNeverNaN())
     228           0 :         nanCond = Assembler::NaN_HandledByCond;
     229             : 
     230           0 :     ScratchDoubleScope scratch(masm);
     231           0 :     masm.zeroDouble(scratch);
     232           0 :     masm.compareDouble(Assembler::DoubleEqualOrUnordered, opd, scratch);
     233           0 :     masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
     234           0 : }
     235             : 
     236             : void
     237           0 : CodeGeneratorX86Shared::visitNotF(LNotF* ins)
     238             : {
     239           0 :     FloatRegister opd = ToFloatRegister(ins->input());
     240             : 
     241             :     // Not returns true if the input is a NaN. We don't have to worry about
     242             :     // it if we know the input is never NaN though.
     243           0 :     Assembler::NaNCond nanCond = Assembler::NaN_IsTrue;
     244           0 :     if (ins->mir()->operandIsNeverNaN())
     245           0 :         nanCond = Assembler::NaN_HandledByCond;
     246             : 
     247           0 :     ScratchFloat32Scope scratch(masm);
     248           0 :     masm.zeroFloat32(scratch);
     249           0 :     masm.compareFloat(Assembler::DoubleEqualOrUnordered, opd, scratch);
     250           0 :     masm.emitSet(Assembler::Equal, ToRegister(ins->output()), nanCond);
     251           0 : }
     252             : 
     253             : void
     254           0 : CodeGeneratorX86Shared::visitCompareDAndBranch(LCompareDAndBranch* comp)
     255             : {
     256           0 :     FloatRegister lhs = ToFloatRegister(comp->left());
     257           0 :     FloatRegister rhs = ToFloatRegister(comp->right());
     258             : 
     259           0 :     Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
     260             : 
     261           0 :     Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
     262           0 :     if (comp->cmpMir()->operandsAreNeverNaN())
     263           0 :         nanCond = Assembler::NaN_HandledByCond;
     264             : 
     265           0 :     masm.compareDouble(cond, lhs, rhs);
     266           0 :     emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
     267           0 : }
     268             : 
     269             : void
     270           0 : CodeGeneratorX86Shared::visitCompareFAndBranch(LCompareFAndBranch* comp)
     271             : {
     272           0 :     FloatRegister lhs = ToFloatRegister(comp->left());
     273           0 :     FloatRegister rhs = ToFloatRegister(comp->right());
     274             : 
     275           0 :     Assembler::DoubleCondition cond = JSOpToDoubleCondition(comp->cmpMir()->jsop());
     276             : 
     277           0 :     Assembler::NaNCond nanCond = Assembler::NaNCondFromDoubleCondition(cond);
     278           0 :     if (comp->cmpMir()->operandsAreNeverNaN())
     279           0 :         nanCond = Assembler::NaN_HandledByCond;
     280             : 
     281           0 :     masm.compareFloat(cond, lhs, rhs);
     282           0 :     emitBranch(Assembler::ConditionFromDoubleCondition(cond), comp->ifTrue(), comp->ifFalse(), nanCond);
     283           0 : }
     284             : 
     285             : void
     286           0 : CodeGeneratorX86Shared::visitWasmStackArg(LWasmStackArg* ins)
     287             : {
     288           0 :     const MWasmStackArg* mir = ins->mir();
     289           0 :     Address dst(StackPointer, mir->spOffset());
     290           0 :     if (ins->arg()->isConstant()) {
     291           0 :         masm.storePtr(ImmWord(ToInt32(ins->arg())), dst);
     292           0 :     } else if (ins->arg()->isGeneralReg()) {
     293           0 :         masm.storePtr(ToRegister(ins->arg()), dst);
     294             :     } else {
     295           0 :         switch (mir->input()->type()) {
     296             :           case MIRType::Double:
     297           0 :             masm.storeDouble(ToFloatRegister(ins->arg()), dst);
     298           0 :             return;
     299             :           case MIRType::Float32:
     300           0 :             masm.storeFloat32(ToFloatRegister(ins->arg()), dst);
     301           0 :             return;
     302             :           // StackPointer is SIMD-aligned and ABIArgGenerator guarantees
     303             :           // stack offsets are SIMD-aligned.
     304             :           case MIRType::Int32x4:
     305             :           case MIRType::Bool32x4:
     306           0 :             masm.storeAlignedSimd128Int(ToFloatRegister(ins->arg()), dst);
     307           0 :             return;
     308             :           case MIRType::Float32x4:
     309           0 :             masm.storeAlignedSimd128Float(ToFloatRegister(ins->arg()), dst);
     310           0 :             return;
     311           0 :           default: break;
     312             :         }
     313           0 :         MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("unexpected mir type in WasmStackArg");
     314             :     }
     315             : }
     316             : 
     317             : void
     318           0 : CodeGeneratorX86Shared::visitWasmStackArgI64(LWasmStackArgI64* ins)
     319             : {
     320           0 :     const MWasmStackArg* mir = ins->mir();
     321           0 :     Address dst(StackPointer, mir->spOffset());
     322           0 :     if (IsConstant(ins->arg()))
     323           0 :         masm.store64(Imm64(ToInt64(ins->arg())), dst);
     324             :     else
     325           0 :         masm.store64(ToRegister64(ins->arg()), dst);
     326           0 : }
     327             : 
     328             : void
     329           0 : CodeGeneratorX86Shared::visitWasmSelect(LWasmSelect* ins)
     330             : {
     331           0 :     MIRType mirType = ins->mir()->type();
     332             : 
     333           0 :     Register cond = ToRegister(ins->condExpr());
     334           0 :     Operand falseExpr = ToOperand(ins->falseExpr());
     335             : 
     336           0 :     masm.test32(cond, cond);
     337             : 
     338           0 :     if (mirType == MIRType::Int32) {
     339           0 :         Register out = ToRegister(ins->output());
     340           0 :         MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output");
     341           0 :         masm.cmovz(falseExpr, out);
     342           0 :         return;
     343             :     }
     344             : 
     345           0 :     FloatRegister out = ToFloatRegister(ins->output());
     346           0 :     MOZ_ASSERT(ToFloatRegister(ins->trueExpr()) == out, "true expr input is reused for output");
     347             : 
     348           0 :     Label done;
     349           0 :     masm.j(Assembler::NonZero, &done);
     350             : 
     351           0 :     if (mirType == MIRType::Float32) {
     352           0 :         if (falseExpr.kind() == Operand::FPREG)
     353           0 :             masm.moveFloat32(ToFloatRegister(ins->falseExpr()), out);
     354             :         else
     355           0 :             masm.loadFloat32(falseExpr, out);
     356           0 :     } else if (mirType == MIRType::Double) {
     357           0 :         if (falseExpr.kind() == Operand::FPREG)
     358           0 :             masm.moveDouble(ToFloatRegister(ins->falseExpr()), out);
     359             :         else
     360           0 :             masm.loadDouble(falseExpr, out);
     361             :     } else {
     362           0 :         MOZ_CRASH("unhandled type in visitWasmSelect!");
     363             :     }
     364             : 
     365           0 :     masm.bind(&done);
     366           0 :     return;
     367             : }
     368             : 
     369             : void
     370           0 : CodeGeneratorX86Shared::visitWasmReinterpret(LWasmReinterpret* lir)
     371             : {
     372           0 :     MOZ_ASSERT(gen->compilingWasm());
     373           0 :     MWasmReinterpret* ins = lir->mir();
     374             : 
     375           0 :     MIRType to = ins->type();
     376             : #ifdef DEBUG
     377           0 :     MIRType from = ins->input()->type();
     378             : #endif
     379             : 
     380           0 :     switch (to) {
     381             :       case MIRType::Int32:
     382           0 :         MOZ_ASSERT(from == MIRType::Float32);
     383           0 :         masm.vmovd(ToFloatRegister(lir->input()), ToRegister(lir->output()));
     384           0 :         break;
     385             :       case MIRType::Float32:
     386           0 :         MOZ_ASSERT(from == MIRType::Int32);
     387           0 :         masm.vmovd(ToRegister(lir->input()), ToFloatRegister(lir->output()));
     388           0 :         break;
     389             :       case MIRType::Double:
     390             :       case MIRType::Int64:
     391           0 :         MOZ_CRASH("not handled by this LIR opcode");
     392             :       default:
     393           0 :         MOZ_CRASH("unexpected WasmReinterpret");
     394             :     }
     395           0 : }
     396             : 
     397             : void
     398           0 : CodeGeneratorX86Shared::visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool)
     399             : {
     400           0 :     switch (ool->viewType()) {
     401             :       case Scalar::Int64:
     402             :       case Scalar::Float32x4:
     403             :       case Scalar::Int8x16:
     404             :       case Scalar::Int16x8:
     405             :       case Scalar::Int32x4:
     406             :       case Scalar::MaxTypedArrayViewType:
     407           0 :         MOZ_CRASH("unexpected array type");
     408             :       case Scalar::Float32:
     409           0 :         masm.loadConstantFloat32(float(GenericNaN()), ool->dest().fpu());
     410           0 :         break;
     411             :       case Scalar::Float64:
     412           0 :         masm.loadConstantDouble(GenericNaN(), ool->dest().fpu());
     413           0 :         break;
     414             :       case Scalar::Int8:
     415             :       case Scalar::Uint8:
     416             :       case Scalar::Int16:
     417             :       case Scalar::Uint16:
     418             :       case Scalar::Int32:
     419             :       case Scalar::Uint32:
     420             :       case Scalar::Uint8Clamped:
     421           0 :         Register destReg = ool->dest().gpr();
     422           0 :         masm.mov(ImmWord(0), destReg);
     423           0 :         break;
     424             :     }
     425           0 :     masm.jmp(ool->rejoin());
     426           0 : }
     427             : 
     428             : void
     429           0 : CodeGeneratorX86Shared::visitWasmAddOffset(LWasmAddOffset* lir)
     430             : {
     431           0 :     MWasmAddOffset* mir = lir->mir();
     432           0 :     Register base = ToRegister(lir->base());
     433           0 :     Register out = ToRegister(lir->output());
     434             : 
     435           0 :     if (base != out)
     436           0 :         masm.move32(base, out);
     437           0 :     masm.add32(Imm32(mir->offset()), out);
     438             : 
     439           0 :     masm.j(Assembler::CarrySet, trap(mir, wasm::Trap::OutOfBounds));
     440           0 : }
     441             : 
     442             : void
     443           0 : CodeGeneratorX86Shared::visitWasmTruncateToInt32(LWasmTruncateToInt32* lir)
     444             : {
     445           0 :     FloatRegister input = ToFloatRegister(lir->input());
     446           0 :     Register output = ToRegister(lir->output());
     447             : 
     448           0 :     MWasmTruncateToInt32* mir = lir->mir();
     449           0 :     MIRType inputType = mir->input()->type();
     450             : 
     451           0 :     MOZ_ASSERT(inputType == MIRType::Double || inputType == MIRType::Float32);
     452             : 
     453           0 :     auto* ool = new (alloc()) OutOfLineWasmTruncateCheck(mir, input);
     454           0 :     addOutOfLineCode(ool, mir);
     455             : 
     456           0 :     Label* oolEntry = ool->entry();
     457           0 :     if (mir->isUnsigned()) {
     458           0 :         if (inputType == MIRType::Double)
     459           0 :             masm.wasmTruncateDoubleToUInt32(input, output, oolEntry);
     460           0 :         else if (inputType == MIRType::Float32)
     461           0 :             masm.wasmTruncateFloat32ToUInt32(input, output, oolEntry);
     462             :         else
     463           0 :             MOZ_CRASH("unexpected type");
     464           0 :         return;
     465             :     }
     466             : 
     467           0 :     if (inputType == MIRType::Double)
     468           0 :         masm.wasmTruncateDoubleToInt32(input, output, oolEntry);
     469           0 :     else if (inputType == MIRType::Float32)
     470           0 :         masm.wasmTruncateFloat32ToInt32(input, output, oolEntry);
     471             :     else
     472           0 :         MOZ_CRASH("unexpected type");
     473             : 
     474           0 :     masm.bind(ool->rejoin());
     475             : }
     476             : 
     477             : bool
     478           8 : CodeGeneratorX86Shared::generateOutOfLineCode()
     479             : {
     480           8 :     if (!CodeGeneratorShared::generateOutOfLineCode())
     481           0 :         return false;
     482             : 
     483           8 :     if (deoptLabel_.used()) {
     484             :         // All non-table-based bailouts will go here.
     485           8 :         masm.bind(&deoptLabel_);
     486             : 
     487             :         // Push the frame size, so the handler can recover the IonScript.
     488           8 :         masm.push(Imm32(frameSize()));
     489             : 
     490           8 :         JitCode* handler = gen->jitRuntime()->getGenericBailoutHandler();
     491           8 :         masm.jmp(ImmPtr(handler->raw()), Relocation::JITCODE);
     492             :     }
     493             : 
     494           8 :     return !masm.oom();
     495             : }
     496             : 
     497             : class BailoutJump {
     498             :     Assembler::Condition cond_;
     499             : 
     500             :   public:
     501          91 :     explicit BailoutJump(Assembler::Condition cond) : cond_(cond)
     502          91 :     { }
     503             : #ifdef JS_CODEGEN_X86
     504             :     void operator()(MacroAssembler& masm, uint8_t* code) const {
     505             :         masm.j(cond_, ImmPtr(code), Relocation::HARDCODED);
     506             :     }
     507             : #endif
     508          91 :     void operator()(MacroAssembler& masm, Label* label) const {
     509          91 :         masm.j(cond_, label);
     510          91 :     }
     511             : };
     512             : 
     513             : class BailoutLabel {
     514             :     Label* label_;
     515             : 
     516             :   public:
     517         279 :     explicit BailoutLabel(Label* label) : label_(label)
     518         279 :     { }
     519             : #ifdef JS_CODEGEN_X86
     520             :     void operator()(MacroAssembler& masm, uint8_t* code) const {
     521             :         masm.retarget(label_, ImmPtr(code), Relocation::HARDCODED);
     522             :     }
     523             : #endif
     524         279 :     void operator()(MacroAssembler& masm, Label* label) const {
     525         279 :         masm.retarget(label_, label);
     526         279 :     }
     527             : };
     528             : 
     529             : template <typename T> void
     530         370 : CodeGeneratorX86Shared::bailout(const T& binder, LSnapshot* snapshot)
     531             : {
     532         370 :     encode(snapshot);
     533             : 
     534             :     // Though the assembler doesn't track all frame pushes, at least make sure
     535             :     // the known value makes sense. We can't use bailout tables if the stack
     536             :     // isn't properly aligned to the static frame size.
     537         370 :     MOZ_ASSERT_IF(frameClass_ != FrameSizeClass::None() && deoptTable_,
     538             :                   frameClass_.frameSize() == masm.framePushed());
     539             : 
     540             : #ifdef JS_CODEGEN_X86
     541             :     // On x64, bailout tables are pointless, because 16 extra bytes are
     542             :     // reserved per external jump, whereas it takes only 10 bytes to encode a
     543             :     // a non-table based bailout.
     544             :     if (assignBailoutId(snapshot)) {
     545             :         binder(masm, deoptTable_->raw() + snapshot->bailoutId() * BAILOUT_TABLE_ENTRY_SIZE);
     546             :         return;
     547             :     }
     548             : #endif
     549             : 
     550             :     // We could not use a jump table, either because all bailout IDs were
     551             :     // reserved, or a jump table is not optimal for this frame size or
     552             :     // platform. Whatever, we will generate a lazy bailout.
     553             :     //
     554             :     // All bailout code is associated with the bytecodeSite of the block we are
     555             :     // bailing out from.
     556         370 :     InlineScriptTree* tree = snapshot->mir()->block()->trackedTree();
     557         370 :     OutOfLineBailout* ool = new(alloc()) OutOfLineBailout(snapshot);
     558         370 :     addOutOfLineCode(ool, new(alloc()) BytecodeSite(tree, tree->script()->code()));
     559             : 
     560         370 :     binder(masm, ool->entry());
     561         370 : }
     562             : 
     563             : void
     564          91 : CodeGeneratorX86Shared::bailoutIf(Assembler::Condition condition, LSnapshot* snapshot)
     565             : {
     566          91 :     bailout(BailoutJump(condition), snapshot);
     567          91 : }
     568             : 
     569             : void
     570           0 : CodeGeneratorX86Shared::bailoutIf(Assembler::DoubleCondition condition, LSnapshot* snapshot)
     571             : {
     572           0 :     MOZ_ASSERT(Assembler::NaNCondFromDoubleCondition(condition) == Assembler::NaN_HandledByCond);
     573           0 :     bailoutIf(Assembler::ConditionFromDoubleCondition(condition), snapshot);
     574           0 : }
     575             : 
     576             : void
     577         279 : CodeGeneratorX86Shared::bailoutFrom(Label* label, LSnapshot* snapshot)
     578             : {
     579         279 :     MOZ_ASSERT(label->used() && !label->bound());
     580         279 :     bailout(BailoutLabel(label), snapshot);
     581         279 : }
     582             : 
     583             : void
     584          19 : CodeGeneratorX86Shared::bailout(LSnapshot* snapshot)
     585             : {
     586          38 :     Label label;
     587          19 :     masm.jump(&label);
     588          19 :     bailoutFrom(&label, snapshot);
     589          19 : }
     590             : 
     591             : void
     592         370 : CodeGeneratorX86Shared::visitOutOfLineBailout(OutOfLineBailout* ool)
     593             : {
     594         370 :     masm.push(Imm32(ool->snapshot()->snapshotOffset()));
     595         370 :     masm.jmp(&deoptLabel_);
     596         370 : }
     597             : 
     598             : void
     599           0 : CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD* ins)
     600             : {
     601           0 :     FloatRegister first = ToFloatRegister(ins->first());
     602           0 :     FloatRegister second = ToFloatRegister(ins->second());
     603             : #ifdef DEBUG
     604           0 :     FloatRegister output = ToFloatRegister(ins->output());
     605           0 :     MOZ_ASSERT(first == output);
     606             : #endif
     607             : 
     608           0 :     bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
     609             : 
     610           0 :     if (ins->mir()->isMax())
     611           0 :         masm.maxDouble(second, first, handleNaN);
     612             :     else
     613           0 :         masm.minDouble(second, first, handleNaN);
     614           0 : }
     615             : 
     616             : void
     617           0 : CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF* ins)
     618             : {
     619           0 :     FloatRegister first = ToFloatRegister(ins->first());
     620           0 :     FloatRegister second = ToFloatRegister(ins->second());
     621             : #ifdef DEBUG
     622           0 :     FloatRegister output = ToFloatRegister(ins->output());
     623           0 :     MOZ_ASSERT(first == output);
     624             : #endif
     625             : 
     626           0 :     bool handleNaN = !ins->mir()->range() || ins->mir()->range()->canBeNaN();
     627             : 
     628           0 :     if (ins->mir()->isMax())
     629           0 :         masm.maxFloat32(second, first, handleNaN);
     630             :     else
     631           0 :         masm.minFloat32(second, first, handleNaN);
     632           0 : }
     633             : 
     634             : void
     635           0 : CodeGeneratorX86Shared::visitAbsD(LAbsD* ins)
     636             : {
     637           0 :     FloatRegister input = ToFloatRegister(ins->input());
     638           0 :     MOZ_ASSERT(input == ToFloatRegister(ins->output()));
     639             :     // Load a value which is all ones except for the sign bit.
     640           0 :     ScratchDoubleScope scratch(masm);
     641           0 :     masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits), scratch);
     642           0 :     masm.vandpd(scratch, input, input);
     643           0 : }
     644             : 
     645             : void
     646           0 : CodeGeneratorX86Shared::visitAbsF(LAbsF* ins)
     647             : {
     648           0 :     FloatRegister input = ToFloatRegister(ins->input());
     649           0 :     MOZ_ASSERT(input == ToFloatRegister(ins->output()));
     650             :     // Same trick as visitAbsD above.
     651           0 :     ScratchFloat32Scope scratch(masm);
     652           0 :     masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits), scratch);
     653           0 :     masm.vandps(scratch, input, input);
     654           0 : }
     655             : 
     656             : void
     657           0 : CodeGeneratorX86Shared::visitClzI(LClzI* ins)
     658             : {
     659           0 :     Register input = ToRegister(ins->input());
     660           0 :     Register output = ToRegister(ins->output());
     661           0 :     bool knownNotZero = ins->mir()->operandIsNeverZero();
     662             : 
     663           0 :     masm.clz32(input, output, knownNotZero);
     664           0 : }
     665             : 
     666             : void
     667           0 : CodeGeneratorX86Shared::visitCtzI(LCtzI* ins)
     668             : {
     669           0 :     Register input = ToRegister(ins->input());
     670           0 :     Register output = ToRegister(ins->output());
     671           0 :     bool knownNotZero = ins->mir()->operandIsNeverZero();
     672             : 
     673           0 :     masm.ctz32(input, output, knownNotZero);
     674           0 : }
     675             : 
     676             : void
     677           0 : CodeGeneratorX86Shared::visitPopcntI(LPopcntI* ins)
     678             : {
     679           0 :     Register input = ToRegister(ins->input());
     680           0 :     Register output = ToRegister(ins->output());
     681           0 :     Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
     682             : 
     683           0 :     masm.popcnt32(input, output, temp);
     684           0 : }
     685             : 
     686             : void
     687           0 : CodeGeneratorX86Shared::visitSqrtD(LSqrtD* ins)
     688             : {
     689           0 :     FloatRegister input = ToFloatRegister(ins->input());
     690           0 :     FloatRegister output = ToFloatRegister(ins->output());
     691           0 :     masm.vsqrtsd(input, output, output);
     692           0 : }
     693             : 
     694             : void
     695           0 : CodeGeneratorX86Shared::visitSqrtF(LSqrtF* ins)
     696             : {
     697           0 :     FloatRegister input = ToFloatRegister(ins->input());
     698           0 :     FloatRegister output = ToFloatRegister(ins->output());
     699           0 :     masm.vsqrtss(input, output, output);
     700           0 : }
     701             : 
     702             : void
     703           0 : CodeGeneratorX86Shared::visitPowHalfD(LPowHalfD* ins)
     704             : {
     705           0 :     FloatRegister input = ToFloatRegister(ins->input());
     706           0 :     FloatRegister output = ToFloatRegister(ins->output());
     707             : 
     708           0 :     ScratchDoubleScope scratch(masm);
     709             : 
     710           0 :     Label done, sqrt;
     711             : 
     712           0 :     if (!ins->mir()->operandIsNeverNegativeInfinity()) {
     713             :         // Branch if not -Infinity.
     714           0 :         masm.loadConstantDouble(NegativeInfinity<double>(), scratch);
     715             : 
     716           0 :         Assembler::DoubleCondition cond = Assembler::DoubleNotEqualOrUnordered;
     717           0 :         if (ins->mir()->operandIsNeverNaN())
     718           0 :             cond = Assembler::DoubleNotEqual;
     719           0 :         masm.branchDouble(cond, input, scratch, &sqrt);
     720             : 
     721             :         // Math.pow(-Infinity, 0.5) == Infinity.
     722           0 :         masm.zeroDouble(output);
     723           0 :         masm.subDouble(scratch, output);
     724           0 :         masm.jump(&done);
     725             : 
     726           0 :         masm.bind(&sqrt);
     727             :     }
     728             : 
     729           0 :     if (!ins->mir()->operandIsNeverNegativeZero()) {
     730             :         // Math.pow(-0, 0.5) == 0 == Math.pow(0, 0.5). Adding 0 converts any -0 to 0.
     731           0 :         masm.zeroDouble(scratch);
     732           0 :         masm.addDouble(input, scratch);
     733           0 :         masm.vsqrtsd(scratch, output, output);
     734             :     } else {
     735           0 :         masm.vsqrtsd(input, output, output);
     736             :     }
     737             : 
     738           0 :     masm.bind(&done);
     739           0 : }
     740             : 
     741             : class OutOfLineUndoALUOperation : public OutOfLineCodeBase<CodeGeneratorX86Shared>
     742             : {
     743             :     LInstruction* ins_;
     744             : 
     745             :   public:
     746           1 :     explicit OutOfLineUndoALUOperation(LInstruction* ins)
     747           1 :         : ins_(ins)
     748           1 :     { }
     749             : 
     750           1 :     virtual void accept(CodeGeneratorX86Shared* codegen) {
     751           1 :         codegen->visitOutOfLineUndoALUOperation(this);
     752           1 :     }
     753           2 :     LInstruction* ins() const {
     754           2 :         return ins_;
     755             :     }
     756             : };
     757             : 
     758             : void
     759           8 : CodeGeneratorX86Shared::visitAddI(LAddI* ins)
     760             : {
     761           8 :     if (ins->rhs()->isConstant())
     762           8 :         masm.addl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
     763             :     else
     764           0 :         masm.addl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
     765             : 
     766           8 :     if (ins->snapshot()) {
     767           1 :         if (ins->recoversInput()) {
     768           1 :             OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
     769           1 :             addOutOfLineCode(ool, ins->mir());
     770           1 :             masm.j(Assembler::Overflow, ool->entry());
     771             :         } else {
     772           0 :             bailoutIf(Assembler::Overflow, ins->snapshot());
     773             :         }
     774             :     }
     775           8 : }
     776             : 
     777             : void
     778           0 : CodeGeneratorX86Shared::visitAddI64(LAddI64* lir)
     779             : {
     780           0 :     const LInt64Allocation lhs = lir->getInt64Operand(LAddI64::Lhs);
     781           0 :     const LInt64Allocation rhs = lir->getInt64Operand(LAddI64::Rhs);
     782             : 
     783           0 :     MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
     784             : 
     785           0 :     if (IsConstant(rhs)) {
     786           0 :         masm.add64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
     787           0 :         return;
     788             :     }
     789             : 
     790           0 :     masm.add64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
     791             : }
     792             : 
     793             : void
     794           0 : CodeGeneratorX86Shared::visitSubI(LSubI* ins)
     795             : {
     796           0 :     if (ins->rhs()->isConstant())
     797           0 :         masm.subl(Imm32(ToInt32(ins->rhs())), ToOperand(ins->lhs()));
     798             :     else
     799           0 :         masm.subl(ToOperand(ins->rhs()), ToRegister(ins->lhs()));
     800             : 
     801           0 :     if (ins->snapshot()) {
     802           0 :         if (ins->recoversInput()) {
     803           0 :             OutOfLineUndoALUOperation* ool = new(alloc()) OutOfLineUndoALUOperation(ins);
     804           0 :             addOutOfLineCode(ool, ins->mir());
     805           0 :             masm.j(Assembler::Overflow, ool->entry());
     806             :         } else {
     807           0 :             bailoutIf(Assembler::Overflow, ins->snapshot());
     808             :         }
     809             :     }
     810           0 : }
     811             : 
     812             : void
     813           0 : CodeGeneratorX86Shared::visitSubI64(LSubI64* lir)
     814             : {
     815           0 :     const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
     816           0 :     const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
     817             : 
     818           0 :     MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
     819             : 
     820           0 :     if (IsConstant(rhs)) {
     821           0 :         masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
     822           0 :         return;
     823             :     }
     824             : 
     825           0 :     masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
     826             : }
     827             : 
     828             : void
     829           1 : CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool)
     830             : {
     831           1 :     LInstruction* ins = ool->ins();
     832           1 :     Register reg = ToRegister(ins->getDef(0));
     833             : 
     834           2 :     DebugOnly<LAllocation*> lhs = ins->getOperand(0);
     835           1 :     LAllocation* rhs = ins->getOperand(1);
     836             : 
     837           1 :     MOZ_ASSERT(reg == ToRegister(lhs));
     838           1 :     MOZ_ASSERT_IF(rhs->isGeneralReg(), reg != ToRegister(rhs));
     839             : 
     840             :     // Undo the effect of the ALU operation, which was performed on the output
     841             :     // register and overflowed. Writing to the output register clobbered an
     842             :     // input reg, and the original value of the input needs to be recovered
     843             :     // to satisfy the constraint imposed by any RECOVERED_INPUT operands to
     844             :     // the bailout snapshot.
     845             : 
     846           1 :     if (rhs->isConstant()) {
     847           1 :         Imm32 constant(ToInt32(rhs));
     848           1 :         if (ins->isAddI())
     849           1 :             masm.subl(constant, reg);
     850             :         else
     851           0 :             masm.addl(constant, reg);
     852             :     } else {
     853           0 :         if (ins->isAddI())
     854           0 :             masm.subl(ToOperand(rhs), reg);
     855             :         else
     856           0 :             masm.addl(ToOperand(rhs), reg);
     857             :     }
     858             : 
     859           1 :     bailout(ool->ins()->snapshot());
     860           1 : }
     861             : 
     862             : class MulNegativeZeroCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
     863             : {
     864             :     LMulI* ins_;
     865             : 
     866             :   public:
     867           0 :     explicit MulNegativeZeroCheck(LMulI* ins)
     868           0 :       : ins_(ins)
     869           0 :     { }
     870             : 
     871           0 :     virtual void accept(CodeGeneratorX86Shared* codegen) {
     872           0 :         codegen->visitMulNegativeZeroCheck(this);
     873           0 :     }
     874           0 :     LMulI* ins() const {
     875           0 :         return ins_;
     876             :     }
     877             : };
     878             : 
     879             : void
     880           0 : CodeGeneratorX86Shared::visitMulI(LMulI* ins)
     881             : {
     882           0 :     const LAllocation* lhs = ins->lhs();
     883           0 :     const LAllocation* rhs = ins->rhs();
     884           0 :     MMul* mul = ins->mir();
     885           0 :     MOZ_ASSERT_IF(mul->mode() == MMul::Integer, !mul->canBeNegativeZero() && !mul->canOverflow());
     886             : 
     887           0 :     if (rhs->isConstant()) {
     888             :         // Bailout on -0.0
     889           0 :         int32_t constant = ToInt32(rhs);
     890           0 :         if (mul->canBeNegativeZero() && constant <= 0) {
     891           0 :             Assembler::Condition bailoutCond = (constant == 0) ? Assembler::Signed : Assembler::Equal;
     892           0 :             masm.test32(ToRegister(lhs), ToRegister(lhs));
     893           0 :             bailoutIf(bailoutCond, ins->snapshot());
     894             :         }
     895             : 
     896           0 :         switch (constant) {
     897             :           case -1:
     898           0 :             masm.negl(ToOperand(lhs));
     899           0 :             break;
     900             :           case 0:
     901           0 :             masm.xorl(ToOperand(lhs), ToRegister(lhs));
     902           0 :             return; // escape overflow check;
     903             :           case 1:
     904             :             // nop
     905           0 :             return; // escape overflow check;
     906             :           case 2:
     907           0 :             masm.addl(ToOperand(lhs), ToRegister(lhs));
     908           0 :             break;
     909             :           default:
     910           0 :             if (!mul->canOverflow() && constant > 0) {
     911             :                 // Use shift if cannot overflow and constant is power of 2
     912           0 :                 int32_t shift = FloorLog2(constant);
     913           0 :                 if ((1 << shift) == constant) {
     914           0 :                     masm.shll(Imm32(shift), ToRegister(lhs));
     915           0 :                     return;
     916             :                 }
     917             :             }
     918           0 :             masm.imull(Imm32(ToInt32(rhs)), ToRegister(lhs));
     919             :         }
     920             : 
     921             :         // Bailout on overflow
     922           0 :         if (mul->canOverflow())
     923           0 :             bailoutIf(Assembler::Overflow, ins->snapshot());
     924             :     } else {
     925           0 :         masm.imull(ToOperand(rhs), ToRegister(lhs));
     926             : 
     927             :         // Bailout on overflow
     928           0 :         if (mul->canOverflow())
     929           0 :             bailoutIf(Assembler::Overflow, ins->snapshot());
     930             : 
     931           0 :         if (mul->canBeNegativeZero()) {
     932             :             // Jump to an OOL path if the result is 0.
     933           0 :             MulNegativeZeroCheck* ool = new(alloc()) MulNegativeZeroCheck(ins);
     934           0 :             addOutOfLineCode(ool, mul);
     935             : 
     936           0 :             masm.test32(ToRegister(lhs), ToRegister(lhs));
     937           0 :             masm.j(Assembler::Zero, ool->entry());
     938           0 :             masm.bind(ool->rejoin());
     939             :         }
     940             :     }
     941             : }
     942             : 
     943             : void
     944           0 : CodeGeneratorX86Shared::visitMulI64(LMulI64* lir)
     945             : {
     946           0 :     const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
     947           0 :     const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
     948             : 
     949           0 :     MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
     950             : 
     951           0 :     if (IsConstant(rhs)) {
     952           0 :         int64_t constant = ToInt64(rhs);
     953           0 :         switch (constant) {
     954             :           case -1:
     955           0 :             masm.neg64(ToRegister64(lhs));
     956           0 :             return;
     957             :           case 0:
     958           0 :             masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
     959           0 :             return;
     960             :           case 1:
     961             :             // nop
     962           0 :             return;
     963             :           case 2:
     964           0 :             masm.add64(ToRegister64(lhs), ToRegister64(lhs));
     965           0 :             return;
     966             :           default:
     967           0 :             if (constant > 0) {
     968             :                 // Use shift if constant is power of 2.
     969           0 :                 int32_t shift = mozilla::FloorLog2(constant);
     970           0 :                 if (int64_t(1) << shift == constant) {
     971           0 :                     masm.lshift64(Imm32(shift), ToRegister64(lhs));
     972           0 :                     return;
     973             :                 }
     974             :             }
     975           0 :             Register temp = ToTempRegisterOrInvalid(lir->temp());
     976           0 :             masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
     977             :         }
     978             :     } else {
     979           0 :         Register temp = ToTempRegisterOrInvalid(lir->temp());
     980           0 :         masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
     981             :     }
     982             : }
     983             : 
     984             : class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared>
     985             : {
     986             :     Register reg_;
     987             : 
     988             :   public:
     989           0 :     explicit ReturnZero(Register reg)
     990           0 :       : reg_(reg)
     991           0 :     { }
     992             : 
     993           0 :     virtual void accept(CodeGeneratorX86Shared* codegen) {
     994           0 :         codegen->visitReturnZero(this);
     995           0 :     }
     996           0 :     Register reg() const {
     997           0 :         return reg_;
     998             :     }
     999             : };
    1000             : 
    1001             : void
    1002           0 : CodeGeneratorX86Shared::visitReturnZero(ReturnZero* ool)
    1003             : {
    1004           0 :     masm.mov(ImmWord(0), ool->reg());
    1005           0 :     masm.jmp(ool->rejoin());
    1006           0 : }
    1007             : 
    1008             : void
    1009           0 : CodeGeneratorX86Shared::visitUDivOrMod(LUDivOrMod* ins)
    1010             : {
    1011           0 :     Register lhs = ToRegister(ins->lhs());
    1012           0 :     Register rhs = ToRegister(ins->rhs());
    1013           0 :     Register output = ToRegister(ins->output());
    1014             : 
    1015           0 :     MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
    1016           0 :     MOZ_ASSERT(rhs != edx);
    1017           0 :     MOZ_ASSERT_IF(output == eax, ToRegister(ins->remainder()) == edx);
    1018             : 
    1019           0 :     ReturnZero* ool = nullptr;
    1020             : 
    1021             :     // Put the lhs in eax.
    1022           0 :     if (lhs != eax)
    1023           0 :         masm.mov(lhs, eax);
    1024             : 
    1025             :     // Prevent divide by zero.
    1026           0 :     if (ins->canBeDivideByZero()) {
    1027           0 :         masm.test32(rhs, rhs);
    1028           0 :         if (ins->mir()->isTruncated()) {
    1029           0 :             if (ins->trapOnError()) {
    1030           0 :                 masm.j(Assembler::Zero, trap(ins, wasm::Trap::IntegerDivideByZero));
    1031             :             } else {
    1032           0 :                 ool = new(alloc()) ReturnZero(output);
    1033           0 :                 masm.j(Assembler::Zero, ool->entry());
    1034             :             }
    1035             :         } else {
    1036           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1037             :         }
    1038             :     }
    1039             : 
    1040             :     // Zero extend the lhs into edx to make (edx:eax), since udiv is 64-bit.
    1041           0 :     masm.mov(ImmWord(0), edx);
    1042           0 :     masm.udiv(rhs);
    1043             : 
    1044             :     // If the remainder is > 0, bailout since this must be a double.
    1045           0 :     if (ins->mir()->isDiv() && !ins->mir()->toDiv()->canTruncateRemainder()) {
    1046           0 :         Register remainder = ToRegister(ins->remainder());
    1047           0 :         masm.test32(remainder, remainder);
    1048           0 :         bailoutIf(Assembler::NonZero, ins->snapshot());
    1049             :     }
    1050             : 
    1051             :     // Unsigned div or mod can return a value that's not a signed int32.
    1052             :     // If our users aren't expecting that, bail.
    1053           0 :     if (!ins->mir()->isTruncated()) {
    1054           0 :         masm.test32(output, output);
    1055           0 :         bailoutIf(Assembler::Signed, ins->snapshot());
    1056             :     }
    1057             : 
    1058           0 :     if (ool) {
    1059           0 :         addOutOfLineCode(ool, ins->mir());
    1060           0 :         masm.bind(ool->rejoin());
    1061             :     }
    1062           0 : }
    1063             : 
    1064             : void
    1065           0 : CodeGeneratorX86Shared::visitUDivOrModConstant(LUDivOrModConstant *ins) {
    1066           0 :     Register lhs = ToRegister(ins->numerator());
    1067           0 :     Register output = ToRegister(ins->output());
    1068           0 :     uint32_t d = ins->denominator();
    1069             : 
    1070             :     // This emits the division answer into edx or the modulus answer into eax.
    1071           0 :     MOZ_ASSERT(output == eax || output == edx);
    1072           0 :     MOZ_ASSERT(lhs != eax && lhs != edx);
    1073           0 :     bool isDiv = (output == edx);
    1074             : 
    1075           0 :     if (d == 0) {
    1076           0 :         if (ins->mir()->isTruncated()) {
    1077           0 :             if (ins->trapOnError())
    1078           0 :                 masm.jump(trap(ins, wasm::Trap::IntegerDivideByZero));
    1079             :             else
    1080           0 :                 masm.xorl(output, output);
    1081             :         } else {
    1082           0 :             bailout(ins->snapshot());
    1083             :         }
    1084           0 :         return;
    1085             :     }
    1086             : 
    1087             :     // The denominator isn't a power of 2 (see LDivPowTwoI and LModPowTwoI).
    1088           0 :     MOZ_ASSERT((d & (d - 1)) != 0);
    1089             : 
    1090           0 :     ReciprocalMulConstants rmc = computeDivisionConstants(d, /* maxLog = */ 32);
    1091             : 
    1092             :     // We first compute (M * n) >> 32, where M = rmc.multiplier.
    1093           0 :     masm.movl(Imm32(rmc.multiplier), eax);
    1094           0 :     masm.umull(lhs);
    1095           0 :     if (rmc.multiplier > UINT32_MAX) {
    1096             :         // M >= 2^32 and shift == 0 is impossible, as d >= 2 implies that
    1097             :         // ((M * n) >> (32 + shift)) >= n > floor(n/d) whenever n >= d, contradicting
    1098             :         // the proof of correctness in computeDivisionConstants.
    1099           0 :         MOZ_ASSERT(rmc.shiftAmount > 0);
    1100           0 :         MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 33));
    1101             : 
    1102             :         // We actually computed edx = ((uint32_t(M) * n) >> 32) instead. Since
    1103             :         // (M * n) >> (32 + shift) is the same as (edx + n) >> shift, we can
    1104             :         // correct for the overflow. This case is a bit trickier than the signed
    1105             :         // case, though, as the (edx + n) addition itself can overflow; however,
    1106             :         // note that (edx + n) >> shift == (((n - edx) >> 1) + edx) >> (shift - 1),
    1107             :         // which is overflow-free. See Hacker's Delight, section 10-8 for details.
    1108             : 
    1109             :         // Compute (n - edx) >> 1 into eax.
    1110           0 :         masm.movl(lhs, eax);
    1111           0 :         masm.subl(edx, eax);
    1112           0 :         masm.shrl(Imm32(1), eax);
    1113             : 
    1114             :         // Finish the computation.
    1115           0 :         masm.addl(eax, edx);
    1116           0 :         masm.shrl(Imm32(rmc.shiftAmount - 1), edx);
    1117             :     } else {
    1118           0 :         masm.shrl(Imm32(rmc.shiftAmount), edx);
    1119             :     }
    1120             : 
    1121             :     // We now have the truncated division value in edx. If we're
    1122             :     // computing a modulus or checking whether the division resulted
    1123             :     // in an integer, we need to multiply the obtained value by d and
    1124             :     // finish the computation/check.
    1125           0 :     if (!isDiv) {
    1126           0 :         masm.imull(Imm32(d), edx, edx);
    1127           0 :         masm.movl(lhs, eax);
    1128           0 :         masm.subl(edx, eax);
    1129             : 
    1130             :         // The final result of the modulus op, just computed above by the
    1131             :         // sub instruction, can be a number in the range [2^31, 2^32). If
    1132             :         // this is the case and the modulus is not truncated, we must bail
    1133             :         // out.
    1134           0 :         if (!ins->mir()->isTruncated())
    1135           0 :             bailoutIf(Assembler::Signed, ins->snapshot());
    1136           0 :     } else if (!ins->mir()->isTruncated()) {
    1137           0 :         masm.imull(Imm32(d), edx, eax);
    1138           0 :         masm.cmpl(lhs, eax);
    1139           0 :         bailoutIf(Assembler::NotEqual, ins->snapshot());
    1140             :     }
    1141             : }
    1142             : 
    1143             : void
    1144           0 : CodeGeneratorX86Shared::visitMulNegativeZeroCheck(MulNegativeZeroCheck* ool)
    1145             : {
    1146           0 :     LMulI* ins = ool->ins();
    1147           0 :     Register result = ToRegister(ins->output());
    1148           0 :     Operand lhsCopy = ToOperand(ins->lhsCopy());
    1149           0 :     Operand rhs = ToOperand(ins->rhs());
    1150           0 :     MOZ_ASSERT_IF(lhsCopy.kind() == Operand::REG, lhsCopy.reg() != result.code());
    1151             : 
    1152             :     // Result is -0 if lhs or rhs is negative.
    1153           0 :     masm.movl(lhsCopy, result);
    1154           0 :     masm.orl(rhs, result);
    1155           0 :     bailoutIf(Assembler::Signed, ins->snapshot());
    1156             : 
    1157           0 :     masm.mov(ImmWord(0), result);
    1158           0 :     masm.jmp(ool->rejoin());
    1159           0 : }
    1160             : 
    1161             : void
    1162           0 : CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI* ins)
    1163             : {
    1164           0 :     Register lhs = ToRegister(ins->numerator());
    1165           0 :     DebugOnly<Register> output = ToRegister(ins->output());
    1166             : 
    1167           0 :     int32_t shift = ins->shift();
    1168           0 :     bool negativeDivisor = ins->negativeDivisor();
    1169           0 :     MDiv* mir = ins->mir();
    1170             : 
    1171             :     // We use defineReuseInput so these should always be the same, which is
    1172             :     // convenient since all of our instructions here are two-address.
    1173           0 :     MOZ_ASSERT(lhs == output);
    1174             : 
    1175           0 :     if (!mir->isTruncated() && negativeDivisor) {
    1176             :         // 0 divided by a negative number must return a double.
    1177           0 :         masm.test32(lhs, lhs);
    1178           0 :         bailoutIf(Assembler::Zero, ins->snapshot());
    1179             :     }
    1180             : 
    1181           0 :     if (shift) {
    1182           0 :         if (!mir->isTruncated()) {
    1183             :             // If the remainder is != 0, bailout since this must be a double.
    1184           0 :             masm.test32(lhs, Imm32(UINT32_MAX >> (32 - shift)));
    1185           0 :             bailoutIf(Assembler::NonZero, ins->snapshot());
    1186             :         }
    1187             : 
    1188           0 :         if (mir->isUnsigned()) {
    1189           0 :             masm.shrl(Imm32(shift), lhs);
    1190             :         } else {
    1191             :             // Adjust the value so that shifting produces a correctly
    1192             :             // rounded result when the numerator is negative. See 10-1
    1193             :             // "Signed Division by a Known Power of 2" in Henry
    1194             :             // S. Warren, Jr.'s Hacker's Delight.
    1195           0 :             if (mir->canBeNegativeDividend()) {
    1196           0 :                 Register lhsCopy = ToRegister(ins->numeratorCopy());
    1197           0 :                 MOZ_ASSERT(lhsCopy != lhs);
    1198           0 :                 if (shift > 1)
    1199           0 :                     masm.sarl(Imm32(31), lhs);
    1200           0 :                 masm.shrl(Imm32(32 - shift), lhs);
    1201           0 :                 masm.addl(lhsCopy, lhs);
    1202             :             }
    1203           0 :             masm.sarl(Imm32(shift), lhs);
    1204             : 
    1205           0 :             if (negativeDivisor)
    1206           0 :                 masm.negl(lhs);
    1207             :         }
    1208           0 :         return;
    1209             :     }
    1210             : 
    1211           0 :     if (negativeDivisor) {
    1212             :         // INT32_MIN / -1 overflows.
    1213           0 :         masm.negl(lhs);
    1214           0 :         if (!mir->isTruncated())
    1215           0 :             bailoutIf(Assembler::Overflow, ins->snapshot());
    1216           0 :         else if (mir->trapOnError())
    1217           0 :             masm.j(Assembler::Overflow, trap(mir, wasm::Trap::IntegerOverflow));
    1218           0 :     } else if (mir->isUnsigned() && !mir->isTruncated()) {
    1219             :         // Unsigned division by 1 can overflow if output is not
    1220             :         // truncated.
    1221           0 :         masm.test32(lhs, lhs);
    1222           0 :         bailoutIf(Assembler::Signed, ins->snapshot());
    1223             :     }
    1224             : }
    1225             : 
    1226             : void
    1227           0 : CodeGeneratorX86Shared::visitDivOrModConstantI(LDivOrModConstantI* ins) {
    1228           0 :     Register lhs = ToRegister(ins->numerator());
    1229           0 :     Register output = ToRegister(ins->output());
    1230           0 :     int32_t d = ins->denominator();
    1231             : 
    1232             :     // This emits the division answer into edx or the modulus answer into eax.
    1233           0 :     MOZ_ASSERT(output == eax || output == edx);
    1234           0 :     MOZ_ASSERT(lhs != eax && lhs != edx);
    1235           0 :     bool isDiv = (output == edx);
    1236             : 
    1237             :     // The absolute value of the denominator isn't a power of 2 (see LDivPowTwoI
    1238             :     // and LModPowTwoI).
    1239           0 :     MOZ_ASSERT((Abs(d) & (Abs(d) - 1)) != 0);
    1240             : 
    1241             :     // We will first divide by Abs(d), and negate the answer if d is negative.
    1242             :     // If desired, this can be avoided by generalizing computeDivisionConstants.
    1243           0 :     ReciprocalMulConstants rmc = computeDivisionConstants(Abs(d), /* maxLog = */ 31);
    1244             : 
    1245             :     // We first compute (M * n) >> 32, where M = rmc.multiplier.
    1246           0 :     masm.movl(Imm32(rmc.multiplier), eax);
    1247           0 :     masm.imull(lhs);
    1248           0 :     if (rmc.multiplier > INT32_MAX) {
    1249           0 :         MOZ_ASSERT(rmc.multiplier < (int64_t(1) << 32));
    1250             : 
    1251             :         // We actually computed edx = ((int32_t(M) * n) >> 32) instead. Since
    1252             :         // (M * n) >> 32 is the same as (edx + n), we can correct for the overflow.
    1253             :         // (edx + n) can't overflow, as n and edx have opposite signs because int32_t(M)
    1254             :         // is negative.
    1255           0 :         masm.addl(lhs, edx);
    1256             :     }
    1257             :     // (M * n) >> (32 + shift) is the truncated division answer if n is non-negative,
    1258             :     // as proved in the comments of computeDivisionConstants. We must add 1 later if n is
    1259             :     // negative to get the right answer in all cases.
    1260           0 :     masm.sarl(Imm32(rmc.shiftAmount), edx);
    1261             : 
    1262             :     // We'll subtract -1 instead of adding 1, because (n < 0 ? -1 : 0) can be
    1263             :     // computed with just a sign-extending shift of 31 bits.
    1264           0 :     if (ins->canBeNegativeDividend()) {
    1265           0 :         masm.movl(lhs, eax);
    1266           0 :         masm.sarl(Imm32(31), eax);
    1267           0 :         masm.subl(eax, edx);
    1268             :     }
    1269             : 
    1270             :     // After this, edx contains the correct truncated division result.
    1271           0 :     if (d < 0)
    1272           0 :         masm.negl(edx);
    1273             : 
    1274           0 :     if (!isDiv) {
    1275           0 :         masm.imull(Imm32(-d), edx, eax);
    1276           0 :         masm.addl(lhs, eax);
    1277             :     }
    1278             : 
    1279           0 :     if (!ins->mir()->isTruncated()) {
    1280           0 :         if (isDiv) {
    1281             :             // This is a division op. Multiply the obtained value by d to check if
    1282             :             // the correct answer is an integer. This cannot overflow, since |d| > 1.
    1283           0 :             masm.imull(Imm32(d), edx, eax);
    1284           0 :             masm.cmp32(lhs, eax);
    1285           0 :             bailoutIf(Assembler::NotEqual, ins->snapshot());
    1286             : 
    1287             :             // If lhs is zero and the divisor is negative, the answer should have
    1288             :             // been -0.
    1289           0 :             if (d < 0) {
    1290           0 :                 masm.test32(lhs, lhs);
    1291           0 :                 bailoutIf(Assembler::Zero, ins->snapshot());
    1292             :             }
    1293           0 :         } else if (ins->canBeNegativeDividend()) {
    1294             :             // This is a mod op. If the computed value is zero and lhs
    1295             :             // is negative, the answer should have been -0.
    1296           0 :             Label done;
    1297             : 
    1298           0 :             masm.cmp32(lhs, Imm32(0));
    1299           0 :             masm.j(Assembler::GreaterThanOrEqual, &done);
    1300             : 
    1301           0 :             masm.test32(eax, eax);
    1302           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1303             : 
    1304           0 :             masm.bind(&done);
    1305             :         }
    1306             :     }
    1307           0 : }
    1308             : 
    1309             : void
    1310           0 : CodeGeneratorX86Shared::visitDivI(LDivI* ins)
    1311             : {
    1312           0 :     Register remainder = ToRegister(ins->remainder());
    1313           0 :     Register lhs = ToRegister(ins->lhs());
    1314           0 :     Register rhs = ToRegister(ins->rhs());
    1315           0 :     Register output = ToRegister(ins->output());
    1316             : 
    1317           0 :     MDiv* mir = ins->mir();
    1318             : 
    1319           0 :     MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
    1320           0 :     MOZ_ASSERT(rhs != edx);
    1321           0 :     MOZ_ASSERT(remainder == edx);
    1322           0 :     MOZ_ASSERT(output == eax);
    1323             : 
    1324           0 :     Label done;
    1325           0 :     ReturnZero* ool = nullptr;
    1326             : 
    1327             :     // Put the lhs in eax, for either the negative overflow case or the regular
    1328             :     // divide case.
    1329           0 :     if (lhs != eax)
    1330           0 :         masm.mov(lhs, eax);
    1331             : 
    1332             :     // Handle divide by zero.
    1333           0 :     if (mir->canBeDivideByZero()) {
    1334           0 :         masm.test32(rhs, rhs);
    1335           0 :         if (mir->trapOnError()) {
    1336           0 :             masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
    1337           0 :         } else if (mir->canTruncateInfinities()) {
    1338             :             // Truncated division by zero is zero (Infinity|0 == 0)
    1339           0 :             if (!ool)
    1340           0 :                 ool = new(alloc()) ReturnZero(output);
    1341           0 :             masm.j(Assembler::Zero, ool->entry());
    1342             :         } else {
    1343           0 :             MOZ_ASSERT(mir->fallible());
    1344           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1345             :         }
    1346             :     }
    1347             : 
    1348             :     // Handle an integer overflow exception from -2147483648 / -1.
    1349           0 :     if (mir->canBeNegativeOverflow()) {
    1350           0 :         Label notmin;
    1351           0 :         masm.cmp32(lhs, Imm32(INT32_MIN));
    1352           0 :         masm.j(Assembler::NotEqual, &notmin);
    1353           0 :         masm.cmp32(rhs, Imm32(-1));
    1354           0 :         if (mir->trapOnError()) {
    1355           0 :             masm.j(Assembler::Equal, trap(mir, wasm::Trap::IntegerOverflow));
    1356           0 :         } else if (mir->canTruncateOverflow()) {
    1357             :             // (-INT32_MIN)|0 == INT32_MIN and INT32_MIN is already in the
    1358             :             // output register (lhs == eax).
    1359           0 :             masm.j(Assembler::Equal, &done);
    1360             :         } else {
    1361           0 :             MOZ_ASSERT(mir->fallible());
    1362           0 :             bailoutIf(Assembler::Equal, ins->snapshot());
    1363             :         }
    1364           0 :         masm.bind(&notmin);
    1365             :     }
    1366             : 
    1367             :     // Handle negative 0.
    1368           0 :     if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
    1369           0 :         Label nonzero;
    1370           0 :         masm.test32(lhs, lhs);
    1371           0 :         masm.j(Assembler::NonZero, &nonzero);
    1372           0 :         masm.cmp32(rhs, Imm32(0));
    1373           0 :         bailoutIf(Assembler::LessThan, ins->snapshot());
    1374           0 :         masm.bind(&nonzero);
    1375             :     }
    1376             : 
    1377             :     // Sign extend the lhs into edx to make (edx:eax), since idiv is 64-bit.
    1378           0 :     if (lhs != eax)
    1379           0 :         masm.mov(lhs, eax);
    1380           0 :     masm.cdq();
    1381           0 :     masm.idiv(rhs);
    1382             : 
    1383           0 :     if (!mir->canTruncateRemainder()) {
    1384             :         // If the remainder is > 0, bailout since this must be a double.
    1385           0 :         masm.test32(remainder, remainder);
    1386           0 :         bailoutIf(Assembler::NonZero, ins->snapshot());
    1387             :     }
    1388             : 
    1389           0 :     masm.bind(&done);
    1390             : 
    1391           0 :     if (ool) {
    1392           0 :         addOutOfLineCode(ool, mir);
    1393           0 :         masm.bind(ool->rejoin());
    1394             :     }
    1395           0 : }
    1396             : 
    1397             : void
    1398           0 : CodeGeneratorX86Shared::visitModPowTwoI(LModPowTwoI* ins)
    1399             : {
    1400           0 :     Register lhs = ToRegister(ins->getOperand(0));
    1401           0 :     int32_t shift = ins->shift();
    1402             : 
    1403           0 :     Label negative;
    1404             : 
    1405           0 :     if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
    1406             :         // Switch based on sign of the lhs.
    1407             :         // Positive numbers are just a bitmask
    1408           0 :         masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
    1409             :     }
    1410             : 
    1411           0 :     masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
    1412             : 
    1413           0 :     if (!ins->mir()->isUnsigned() && ins->mir()->canBeNegativeDividend()) {
    1414           0 :         Label done;
    1415           0 :         masm.jump(&done);
    1416             : 
    1417             :         // Negative numbers need a negate, bitmask, negate
    1418           0 :         masm.bind(&negative);
    1419             : 
    1420             :         // Unlike in the visitModI case, we are not computing the mod by means of a
    1421             :         // division. Therefore, the divisor = -1 case isn't problematic (the andl
    1422             :         // always returns 0, which is what we expect).
    1423             :         //
    1424             :         // The negl instruction overflows if lhs == INT32_MIN, but this is also not
    1425             :         // a problem: shift is at most 31, and so the andl also always returns 0.
    1426           0 :         masm.negl(lhs);
    1427           0 :         masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
    1428           0 :         masm.negl(lhs);
    1429             : 
    1430             :         // Since a%b has the same sign as b, and a is negative in this branch,
    1431             :         // an answer of 0 means the correct result is actually -0. Bail out.
    1432           0 :         if (!ins->mir()->isTruncated())
    1433           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1434           0 :         masm.bind(&done);
    1435             :     }
    1436           0 : }
    1437             : 
    1438             : class ModOverflowCheck : public OutOfLineCodeBase<CodeGeneratorX86Shared>
    1439             : {
    1440             :     Label done_;
    1441             :     LModI* ins_;
    1442             :     Register rhs_;
    1443             : 
    1444             :   public:
    1445           0 :     explicit ModOverflowCheck(LModI* ins, Register rhs)
    1446           0 :       : ins_(ins), rhs_(rhs)
    1447           0 :     { }
    1448             : 
    1449           0 :     virtual void accept(CodeGeneratorX86Shared* codegen) {
    1450           0 :         codegen->visitModOverflowCheck(this);
    1451           0 :     }
    1452           0 :     Label* done() {
    1453           0 :         return &done_;
    1454             :     }
    1455           0 :     LModI* ins() const {
    1456           0 :         return ins_;
    1457             :     }
    1458           0 :     Register rhs() const {
    1459           0 :         return rhs_;
    1460             :     }
    1461             : };
    1462             : 
    1463             : void
    1464           0 : CodeGeneratorX86Shared::visitModOverflowCheck(ModOverflowCheck* ool)
    1465             : {
    1466           0 :     masm.cmp32(ool->rhs(), Imm32(-1));
    1467           0 :     if (ool->ins()->mir()->isTruncated()) {
    1468           0 :         masm.j(Assembler::NotEqual, ool->rejoin());
    1469           0 :         masm.mov(ImmWord(0), edx);
    1470           0 :         masm.jmp(ool->done());
    1471             :     } else {
    1472           0 :         bailoutIf(Assembler::Equal, ool->ins()->snapshot());
    1473           0 :         masm.jmp(ool->rejoin());
    1474             :     }
    1475           0 : }
    1476             : 
    1477             : void
    1478           0 : CodeGeneratorX86Shared::visitModI(LModI* ins)
    1479             : {
    1480           0 :     Register remainder = ToRegister(ins->remainder());
    1481           0 :     Register lhs = ToRegister(ins->lhs());
    1482           0 :     Register rhs = ToRegister(ins->rhs());
    1483             : 
    1484             :     // Required to use idiv.
    1485           0 :     MOZ_ASSERT_IF(lhs != rhs, rhs != eax);
    1486           0 :     MOZ_ASSERT(rhs != edx);
    1487           0 :     MOZ_ASSERT(remainder == edx);
    1488           0 :     MOZ_ASSERT(ToRegister(ins->getTemp(0)) == eax);
    1489             : 
    1490           0 :     Label done;
    1491           0 :     ReturnZero* ool = nullptr;
    1492           0 :     ModOverflowCheck* overflow = nullptr;
    1493             : 
    1494             :     // Set up eax in preparation for doing a div.
    1495           0 :     if (lhs != eax)
    1496           0 :         masm.mov(lhs, eax);
    1497             : 
    1498           0 :     MMod* mir = ins->mir();
    1499             : 
    1500             :     // Prevent divide by zero.
    1501           0 :     if (mir->canBeDivideByZero()) {
    1502           0 :         masm.test32(rhs, rhs);
    1503           0 :         if (mir->isTruncated()) {
    1504           0 :             if (mir->trapOnError()) {
    1505           0 :                 masm.j(Assembler::Zero, trap(mir, wasm::Trap::IntegerDivideByZero));
    1506             :             } else {
    1507           0 :                 if (!ool)
    1508           0 :                     ool = new(alloc()) ReturnZero(edx);
    1509           0 :                 masm.j(Assembler::Zero, ool->entry());
    1510             :             }
    1511             :         } else {
    1512           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1513             :         }
    1514             :     }
    1515             : 
    1516           0 :     Label negative;
    1517             : 
    1518             :     // Switch based on sign of the lhs.
    1519           0 :     if (mir->canBeNegativeDividend())
    1520           0 :         masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
    1521             : 
    1522             :     // If lhs >= 0 then remainder = lhs % rhs. The remainder must be positive.
    1523             :     {
    1524             :         // Check if rhs is a power-of-two.
    1525           0 :         if (mir->canBePowerOfTwoDivisor()) {
    1526           0 :             MOZ_ASSERT(rhs != remainder);
    1527             : 
    1528             :             // Rhs y is a power-of-two if (y & (y-1)) == 0. Note that if
    1529             :             // y is any negative number other than INT32_MIN, both y and
    1530             :             // y-1 will have the sign bit set so these are never optimized
    1531             :             // as powers-of-two. If y is INT32_MIN, y-1 will be INT32_MAX
    1532             :             // and because lhs >= 0 at this point, lhs & INT32_MAX returns
    1533             :             // the correct value.
    1534           0 :             Label notPowerOfTwo;
    1535           0 :             masm.mov(rhs, remainder);
    1536           0 :             masm.subl(Imm32(1), remainder);
    1537           0 :             masm.branchTest32(Assembler::NonZero, remainder, rhs, &notPowerOfTwo);
    1538             :             {
    1539           0 :                 masm.andl(lhs, remainder);
    1540           0 :                 masm.jmp(&done);
    1541             :             }
    1542           0 :             masm.bind(&notPowerOfTwo);
    1543             :         }
    1544             : 
    1545             :         // Since lhs >= 0, the sign-extension will be 0
    1546           0 :         masm.mov(ImmWord(0), edx);
    1547           0 :         masm.idiv(rhs);
    1548             :     }
    1549             : 
    1550             :     // Otherwise, we have to beware of two special cases:
    1551           0 :     if (mir->canBeNegativeDividend()) {
    1552           0 :         masm.jump(&done);
    1553             : 
    1554           0 :         masm.bind(&negative);
    1555             : 
    1556             :         // Prevent an integer overflow exception from -2147483648 % -1
    1557           0 :         Label notmin;
    1558           0 :         masm.cmp32(lhs, Imm32(INT32_MIN));
    1559           0 :         overflow = new(alloc()) ModOverflowCheck(ins, rhs);
    1560           0 :         masm.j(Assembler::Equal, overflow->entry());
    1561           0 :         masm.bind(overflow->rejoin());
    1562           0 :         masm.cdq();
    1563           0 :         masm.idiv(rhs);
    1564             : 
    1565           0 :         if (!mir->isTruncated()) {
    1566             :             // A remainder of 0 means that the rval must be -0, which is a double.
    1567           0 :             masm.test32(remainder, remainder);
    1568           0 :             bailoutIf(Assembler::Zero, ins->snapshot());
    1569             :         }
    1570             :     }
    1571             : 
    1572           0 :     masm.bind(&done);
    1573             : 
    1574           0 :     if (overflow) {
    1575           0 :         addOutOfLineCode(overflow, mir);
    1576           0 :         masm.bind(overflow->done());
    1577             :     }
    1578             : 
    1579           0 :     if (ool) {
    1580           0 :         addOutOfLineCode(ool, mir);
    1581           0 :         masm.bind(ool->rejoin());
    1582             :     }
    1583           0 : }
    1584             : 
    1585             : void
    1586           0 : CodeGeneratorX86Shared::visitBitNotI(LBitNotI* ins)
    1587             : {
    1588           0 :     const LAllocation* input = ins->getOperand(0);
    1589           0 :     MOZ_ASSERT(!input->isConstant());
    1590             : 
    1591           0 :     masm.notl(ToOperand(input));
    1592           0 : }
    1593             : 
    1594             : void
    1595           0 : CodeGeneratorX86Shared::visitBitOpI(LBitOpI* ins)
    1596             : {
    1597           0 :     const LAllocation* lhs = ins->getOperand(0);
    1598           0 :     const LAllocation* rhs = ins->getOperand(1);
    1599             : 
    1600           0 :     switch (ins->bitop()) {
    1601             :         case JSOP_BITOR:
    1602           0 :             if (rhs->isConstant())
    1603           0 :                 masm.orl(Imm32(ToInt32(rhs)), ToOperand(lhs));
    1604             :             else
    1605           0 :                 masm.orl(ToOperand(rhs), ToRegister(lhs));
    1606           0 :             break;
    1607             :         case JSOP_BITXOR:
    1608           0 :             if (rhs->isConstant())
    1609           0 :                 masm.xorl(Imm32(ToInt32(rhs)), ToOperand(lhs));
    1610             :             else
    1611           0 :                 masm.xorl(ToOperand(rhs), ToRegister(lhs));
    1612           0 :             break;
    1613             :         case JSOP_BITAND:
    1614           0 :             if (rhs->isConstant())
    1615           0 :                 masm.andl(Imm32(ToInt32(rhs)), ToOperand(lhs));
    1616             :             else
    1617           0 :                 masm.andl(ToOperand(rhs), ToRegister(lhs));
    1618           0 :             break;
    1619             :         default:
    1620           0 :             MOZ_CRASH("unexpected binary opcode");
    1621             :     }
    1622           0 : }
    1623             : 
    1624             : void
    1625           0 : CodeGeneratorX86Shared::visitBitOpI64(LBitOpI64* lir)
    1626             : {
    1627           0 :     const LInt64Allocation lhs = lir->getInt64Operand(LBitOpI64::Lhs);
    1628           0 :     const LInt64Allocation rhs = lir->getInt64Operand(LBitOpI64::Rhs);
    1629             : 
    1630           0 :     MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
    1631             : 
    1632           0 :     switch (lir->bitop()) {
    1633             :       case JSOP_BITOR:
    1634           0 :         if (IsConstant(rhs))
    1635           0 :             masm.or64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
    1636             :         else
    1637           0 :             masm.or64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
    1638           0 :         break;
    1639             :       case JSOP_BITXOR:
    1640           0 :         if (IsConstant(rhs))
    1641           0 :             masm.xor64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
    1642             :         else
    1643           0 :             masm.xor64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
    1644           0 :         break;
    1645             :       case JSOP_BITAND:
    1646           0 :         if (IsConstant(rhs))
    1647           0 :             masm.and64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
    1648             :         else
    1649           0 :             masm.and64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
    1650           0 :         break;
    1651             :       default:
    1652           0 :         MOZ_CRASH("unexpected binary opcode");
    1653             :     }
    1654           0 : }
    1655             : 
    1656             : void
    1657           0 : CodeGeneratorX86Shared::visitShiftI(LShiftI* ins)
    1658             : {
    1659           0 :     Register lhs = ToRegister(ins->lhs());
    1660           0 :     const LAllocation* rhs = ins->rhs();
    1661             : 
    1662           0 :     if (rhs->isConstant()) {
    1663           0 :         int32_t shift = ToInt32(rhs) & 0x1F;
    1664           0 :         switch (ins->bitop()) {
    1665             :           case JSOP_LSH:
    1666           0 :             if (shift)
    1667           0 :                 masm.shll(Imm32(shift), lhs);
    1668           0 :             break;
    1669             :           case JSOP_RSH:
    1670           0 :             if (shift)
    1671           0 :                 masm.sarl(Imm32(shift), lhs);
    1672           0 :             break;
    1673             :           case JSOP_URSH:
    1674           0 :             if (shift) {
    1675           0 :                 masm.shrl(Imm32(shift), lhs);
    1676           0 :             } else if (ins->mir()->toUrsh()->fallible()) {
    1677             :                 // x >>> 0 can overflow.
    1678           0 :                 masm.test32(lhs, lhs);
    1679           0 :                 bailoutIf(Assembler::Signed, ins->snapshot());
    1680             :             }
    1681           0 :             break;
    1682             :           default:
    1683           0 :             MOZ_CRASH("Unexpected shift op");
    1684             :         }
    1685             :     } else {
    1686           0 :         MOZ_ASSERT(ToRegister(rhs) == ecx);
    1687           0 :         switch (ins->bitop()) {
    1688             :           case JSOP_LSH:
    1689           0 :             masm.shll_cl(lhs);
    1690           0 :             break;
    1691             :           case JSOP_RSH:
    1692           0 :             masm.sarl_cl(lhs);
    1693           0 :             break;
    1694             :           case JSOP_URSH:
    1695           0 :             masm.shrl_cl(lhs);
    1696           0 :             if (ins->mir()->toUrsh()->fallible()) {
    1697             :                 // x >>> 0 can overflow.
    1698           0 :                 masm.test32(lhs, lhs);
    1699           0 :                 bailoutIf(Assembler::Signed, ins->snapshot());
    1700             :             }
    1701           0 :             break;
    1702             :           default:
    1703           0 :             MOZ_CRASH("Unexpected shift op");
    1704             :         }
    1705             :     }
    1706           0 : }
    1707             : 
    1708             : void
    1709           0 : CodeGeneratorX86Shared::visitShiftI64(LShiftI64* lir)
    1710             : {
    1711           0 :     const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
    1712           0 :     LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
    1713             : 
    1714           0 :     MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
    1715             : 
    1716           0 :     if (rhs->isConstant()) {
    1717           0 :         int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
    1718           0 :         switch (lir->bitop()) {
    1719             :           case JSOP_LSH:
    1720           0 :             if (shift)
    1721           0 :                 masm.lshift64(Imm32(shift), ToRegister64(lhs));
    1722           0 :             break;
    1723             :           case JSOP_RSH:
    1724           0 :             if (shift)
    1725           0 :                 masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
    1726           0 :             break;
    1727             :           case JSOP_URSH:
    1728           0 :             if (shift)
    1729           0 :                 masm.rshift64(Imm32(shift), ToRegister64(lhs));
    1730           0 :             break;
    1731             :           default:
    1732           0 :             MOZ_CRASH("Unexpected shift op");
    1733             :         }
    1734           0 :         return;
    1735             :     }
    1736             : 
    1737           0 :     MOZ_ASSERT(ToRegister(rhs) == ecx);
    1738           0 :     switch (lir->bitop()) {
    1739             :       case JSOP_LSH:
    1740           0 :         masm.lshift64(ecx, ToRegister64(lhs));
    1741           0 :         break;
    1742             :       case JSOP_RSH:
    1743           0 :         masm.rshift64Arithmetic(ecx, ToRegister64(lhs));
    1744           0 :         break;
    1745             :       case JSOP_URSH:
    1746           0 :         masm.rshift64(ecx, ToRegister64(lhs));
    1747           0 :         break;
    1748             :       default:
    1749           0 :         MOZ_CRASH("Unexpected shift op");
    1750             :     }
    1751             : }
    1752             : 
    1753             : void
    1754           0 : CodeGeneratorX86Shared::visitUrshD(LUrshD* ins)
    1755             : {
    1756           0 :     Register lhs = ToRegister(ins->lhs());
    1757           0 :     MOZ_ASSERT(ToRegister(ins->temp()) == lhs);
    1758             : 
    1759           0 :     const LAllocation* rhs = ins->rhs();
    1760           0 :     FloatRegister out = ToFloatRegister(ins->output());
    1761             : 
    1762           0 :     if (rhs->isConstant()) {
    1763           0 :         int32_t shift = ToInt32(rhs) & 0x1F;
    1764           0 :         if (shift)
    1765           0 :             masm.shrl(Imm32(shift), lhs);
    1766             :     } else {
    1767           0 :         MOZ_ASSERT(ToRegister(rhs) == ecx);
    1768           0 :         masm.shrl_cl(lhs);
    1769             :     }
    1770             : 
    1771           0 :     masm.convertUInt32ToDouble(lhs, out);
    1772           0 : }
    1773             : 
    1774             : Operand
    1775         125 : CodeGeneratorX86Shared::ToOperand(const LAllocation& a)
    1776             : {
    1777         125 :     if (a.isGeneralReg())
    1778          93 :         return Operand(a.toGeneralReg()->reg());
    1779          32 :     if (a.isFloatReg())
    1780           0 :         return Operand(a.toFloatReg()->reg());
    1781          32 :     return Operand(masm.getStackPointer(), ToStackOffset(&a));
    1782             : }
    1783             : 
    1784             : Operand
    1785         125 : CodeGeneratorX86Shared::ToOperand(const LAllocation* a)
    1786             : {
    1787         125 :     return ToOperand(*a);
    1788             : }
    1789             : 
    1790             : Operand
    1791           0 : CodeGeneratorX86Shared::ToOperand(const LDefinition* def)
    1792             : {
    1793           0 :     return ToOperand(def->output());
    1794             : }
    1795             : 
    1796             : MoveOperand
    1797        1108 : CodeGeneratorX86Shared::toMoveOperand(LAllocation a) const
    1798             : {
    1799        1108 :     if (a.isGeneralReg())
    1800         566 :         return MoveOperand(ToRegister(a));
    1801         542 :     if (a.isFloatReg())
    1802           0 :         return MoveOperand(ToFloatRegister(a));
    1803         542 :     return MoveOperand(StackPointer, ToStackOffset(a));
    1804             : }
    1805             : 
    1806             : class OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorX86Shared>
    1807             : {
    1808             :     MTableSwitch* mir_;
    1809             :     CodeLabel jumpLabel_;
    1810             : 
    1811           0 :     void accept(CodeGeneratorX86Shared* codegen) {
    1812           0 :         codegen->visitOutOfLineTableSwitch(this);
    1813           0 :     }
    1814             : 
    1815             :   public:
    1816           0 :     explicit OutOfLineTableSwitch(MTableSwitch* mir)
    1817           0 :       : mir_(mir)
    1818           0 :     {}
    1819             : 
    1820           0 :     MTableSwitch* mir() const {
    1821           0 :         return mir_;
    1822             :     }
    1823             : 
    1824           0 :     CodeLabel* jumpLabel() {
    1825           0 :         return &jumpLabel_;
    1826             :     }
    1827             : };
    1828             : 
    1829             : void
    1830           0 : CodeGeneratorX86Shared::visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool)
    1831             : {
    1832           0 :     MTableSwitch* mir = ool->mir();
    1833             : 
    1834           0 :     masm.haltingAlign(sizeof(void*));
    1835           0 :     masm.use(ool->jumpLabel()->target());
    1836           0 :     masm.addCodeLabel(*ool->jumpLabel());
    1837             : 
    1838           0 :     for (size_t i = 0; i < mir->numCases(); i++) {
    1839           0 :         LBlock* caseblock = skipTrivialBlocks(mir->getCase(i))->lir();
    1840           0 :         Label* caseheader = caseblock->label();
    1841           0 :         uint32_t caseoffset = caseheader->offset();
    1842             : 
    1843             :         // The entries of the jump table need to be absolute addresses and thus
    1844             :         // must be patched after codegen is finished.
    1845           0 :         CodeLabel cl;
    1846           0 :         masm.writeCodePointer(cl.patchAt());
    1847           0 :         cl.target()->bind(caseoffset);
    1848           0 :         masm.addCodeLabel(cl);
    1849             :     }
    1850           0 : }
    1851             : 
    1852             : void
    1853           0 : CodeGeneratorX86Shared::emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base)
    1854             : {
    1855           0 :     Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
    1856             : 
    1857             :     // Lower value with low value
    1858           0 :     if (mir->low() != 0)
    1859           0 :         masm.subl(Imm32(mir->low()), index);
    1860             : 
    1861             :     // Jump to default case if input is out of range
    1862           0 :     int32_t cases = mir->numCases();
    1863           0 :     masm.cmp32(index, Imm32(cases));
    1864           0 :     masm.j(AssemblerX86Shared::AboveOrEqual, defaultcase);
    1865             : 
    1866             :     // To fill in the CodeLabels for the case entries, we need to first
    1867             :     // generate the case entries (we don't yet know their offsets in the
    1868             :     // instruction stream).
    1869           0 :     OutOfLineTableSwitch* ool = new(alloc()) OutOfLineTableSwitch(mir);
    1870           0 :     addOutOfLineCode(ool, mir);
    1871             : 
    1872             :     // Compute the position where a pointer to the right case stands.
    1873           0 :     masm.mov(ool->jumpLabel()->patchAt(), base);
    1874           0 :     Operand pointer = Operand(base, index, ScalePointer);
    1875             : 
    1876             :     // Jump to the right case
    1877           0 :     masm.jmp(pointer);
    1878           0 : }
    1879             : 
    1880             : void
    1881           0 : CodeGeneratorX86Shared::visitMathD(LMathD* math)
    1882             : {
    1883           0 :     FloatRegister lhs = ToFloatRegister(math->lhs());
    1884           0 :     Operand rhs = ToOperand(math->rhs());
    1885           0 :     FloatRegister output = ToFloatRegister(math->output());
    1886             : 
    1887           0 :     switch (math->jsop()) {
    1888             :       case JSOP_ADD:
    1889           0 :         masm.vaddsd(rhs, lhs, output);
    1890           0 :         break;
    1891             :       case JSOP_SUB:
    1892           0 :         masm.vsubsd(rhs, lhs, output);
    1893           0 :         break;
    1894             :       case JSOP_MUL:
    1895           0 :         masm.vmulsd(rhs, lhs, output);
    1896           0 :         break;
    1897             :       case JSOP_DIV:
    1898           0 :         masm.vdivsd(rhs, lhs, output);
    1899           0 :         break;
    1900             :       default:
    1901           0 :         MOZ_CRASH("unexpected opcode");
    1902             :     }
    1903           0 : }
    1904             : 
    1905             : void
    1906           0 : CodeGeneratorX86Shared::visitMathF(LMathF* math)
    1907             : {
    1908           0 :     FloatRegister lhs = ToFloatRegister(math->lhs());
    1909           0 :     Operand rhs = ToOperand(math->rhs());
    1910           0 :     FloatRegister output = ToFloatRegister(math->output());
    1911             : 
    1912           0 :     switch (math->jsop()) {
    1913             :       case JSOP_ADD:
    1914           0 :         masm.vaddss(rhs, lhs, output);
    1915           0 :         break;
    1916             :       case JSOP_SUB:
    1917           0 :         masm.vsubss(rhs, lhs, output);
    1918           0 :         break;
    1919             :       case JSOP_MUL:
    1920           0 :         masm.vmulss(rhs, lhs, output);
    1921           0 :         break;
    1922             :       case JSOP_DIV:
    1923           0 :         masm.vdivss(rhs, lhs, output);
    1924           0 :         break;
    1925             :       default:
    1926           0 :         MOZ_CRASH("unexpected opcode");
    1927             :     }
    1928           0 : }
    1929             : 
    1930             : void
    1931           0 : CodeGeneratorX86Shared::visitFloor(LFloor* lir)
    1932             : {
    1933           0 :     FloatRegister input = ToFloatRegister(lir->input());
    1934           0 :     Register output = ToRegister(lir->output());
    1935             : 
    1936           0 :     Label bailout;
    1937             : 
    1938           0 :     if (AssemblerX86Shared::HasSSE41()) {
    1939             :         // Bail on negative-zero.
    1940           0 :         masm.branchNegativeZero(input, output, &bailout);
    1941           0 :         bailoutFrom(&bailout, lir->snapshot());
    1942             : 
    1943             :         // Round toward -Infinity.
    1944             :         {
    1945           0 :             ScratchDoubleScope scratch(masm);
    1946           0 :             masm.vroundsd(X86Encoding::RoundDown, input, scratch, scratch);
    1947           0 :             bailoutCvttsd2si(scratch, output, lir->snapshot());
    1948             :         }
    1949             :     } else {
    1950           0 :         Label negative, end;
    1951             : 
    1952             :         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
    1953             :         {
    1954           0 :             ScratchDoubleScope scratch(masm);
    1955           0 :             masm.zeroDouble(scratch);
    1956           0 :             masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
    1957             :         }
    1958             : 
    1959             :         // Bail on negative-zero.
    1960           0 :         masm.branchNegativeZero(input, output, &bailout);
    1961           0 :         bailoutFrom(&bailout, lir->snapshot());
    1962             : 
    1963             :         // Input is non-negative, so truncation correctly rounds.
    1964           0 :         bailoutCvttsd2si(input, output, lir->snapshot());
    1965             : 
    1966           0 :         masm.jump(&end);
    1967             : 
    1968             :         // Input is negative, but isn't -0.
    1969             :         // Negative values go on a comparatively expensive path, since no
    1970             :         // native rounding mode matches JS semantics. Still better than callVM.
    1971           0 :         masm.bind(&negative);
    1972             :         {
    1973             :             // Truncate and round toward zero.
    1974             :             // This is off-by-one for everything but integer-valued inputs.
    1975           0 :             bailoutCvttsd2si(input, output, lir->snapshot());
    1976             : 
    1977             :             // Test whether the input double was integer-valued.
    1978             :             {
    1979           0 :                 ScratchDoubleScope scratch(masm);
    1980           0 :                 masm.convertInt32ToDouble(output, scratch);
    1981           0 :                 masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
    1982             :             }
    1983             : 
    1984             :             // Input is not integer-valued, so we rounded off-by-one in the
    1985             :             // wrong direction. Correct by subtraction.
    1986           0 :             masm.subl(Imm32(1), output);
    1987             :             // Cannot overflow: output was already checked against INT_MIN.
    1988             :         }
    1989             : 
    1990           0 :         masm.bind(&end);
    1991             :     }
    1992           0 : }
    1993             : 
    1994             : void
    1995           0 : CodeGeneratorX86Shared::visitFloorF(LFloorF* lir)
    1996             : {
    1997           0 :     FloatRegister input = ToFloatRegister(lir->input());
    1998           0 :     Register output = ToRegister(lir->output());
    1999             : 
    2000           0 :     Label bailout;
    2001             : 
    2002           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2003             :         // Bail on negative-zero.
    2004           0 :         masm.branchNegativeZeroFloat32(input, output, &bailout);
    2005           0 :         bailoutFrom(&bailout, lir->snapshot());
    2006             : 
    2007             :         // Round toward -Infinity.
    2008             :         {
    2009           0 :             ScratchFloat32Scope scratch(masm);
    2010           0 :             masm.vroundss(X86Encoding::RoundDown, input, scratch, scratch);
    2011           0 :             bailoutCvttss2si(scratch, output, lir->snapshot());
    2012             :         }
    2013             :     } else {
    2014           0 :         Label negative, end;
    2015             : 
    2016             :         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
    2017             :         {
    2018           0 :             ScratchFloat32Scope scratch(masm);
    2019           0 :             masm.zeroFloat32(scratch);
    2020           0 :             masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
    2021             :         }
    2022             : 
    2023             :         // Bail on negative-zero.
    2024           0 :         masm.branchNegativeZeroFloat32(input, output, &bailout);
    2025           0 :         bailoutFrom(&bailout, lir->snapshot());
    2026             : 
    2027             :         // Input is non-negative, so truncation correctly rounds.
    2028           0 :         bailoutCvttss2si(input, output, lir->snapshot());
    2029             : 
    2030           0 :         masm.jump(&end);
    2031             : 
    2032             :         // Input is negative, but isn't -0.
    2033             :         // Negative values go on a comparatively expensive path, since no
    2034             :         // native rounding mode matches JS semantics. Still better than callVM.
    2035           0 :         masm.bind(&negative);
    2036             :         {
    2037             :             // Truncate and round toward zero.
    2038             :             // This is off-by-one for everything but integer-valued inputs.
    2039           0 :             bailoutCvttss2si(input, output, lir->snapshot());
    2040             : 
    2041             :             // Test whether the input double was integer-valued.
    2042             :             {
    2043           0 :                 ScratchFloat32Scope scratch(masm);
    2044           0 :                 masm.convertInt32ToFloat32(output, scratch);
    2045           0 :                 masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
    2046             :             }
    2047             : 
    2048             :             // Input is not integer-valued, so we rounded off-by-one in the
    2049             :             // wrong direction. Correct by subtraction.
    2050           0 :             masm.subl(Imm32(1), output);
    2051             :             // Cannot overflow: output was already checked against INT_MIN.
    2052             :         }
    2053             : 
    2054           0 :         masm.bind(&end);
    2055             :     }
    2056           0 : }
    2057             : 
    2058             : void
    2059           0 : CodeGeneratorX86Shared::visitCeil(LCeil* lir)
    2060             : {
    2061           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2062           0 :     ScratchDoubleScope scratch(masm);
    2063           0 :     Register output = ToRegister(lir->output());
    2064             : 
    2065           0 :     Label bailout, lessThanMinusOne;
    2066             : 
    2067             :     // Bail on ]-1; -0] range
    2068           0 :     masm.loadConstantDouble(-1, scratch);
    2069           0 :     masm.branchDouble(Assembler::DoubleLessThanOrEqualOrUnordered, input,
    2070           0 :                       scratch, &lessThanMinusOne);
    2071             : 
    2072             :     // Test for remaining values with the sign bit set, i.e. ]-1; -0]
    2073           0 :     masm.vmovmskpd(input, output);
    2074           0 :     masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
    2075           0 :     bailoutFrom(&bailout, lir->snapshot());
    2076             : 
    2077           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2078             :         // x <= -1 or x > -0
    2079           0 :         masm.bind(&lessThanMinusOne);
    2080             :         // Round toward +Infinity.
    2081           0 :         masm.vroundsd(X86Encoding::RoundUp, input, scratch, scratch);
    2082           0 :         bailoutCvttsd2si(scratch, output, lir->snapshot());
    2083           0 :         return;
    2084             :     }
    2085             : 
    2086             :     // No SSE4.1
    2087           0 :     Label end;
    2088             : 
    2089             :     // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
    2090             :     // integer (resp. non-integer) values.
    2091             :     // Will also work for values >= INT_MAX + 1, as the truncate
    2092             :     // operation will return INT_MIN and there'll be a bailout.
    2093           0 :     bailoutCvttsd2si(input, output, lir->snapshot());
    2094           0 :     masm.convertInt32ToDouble(output, scratch);
    2095           0 :     masm.branchDouble(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
    2096             : 
    2097             :     // Input is not integer-valued, add 1 to obtain the ceiling value
    2098           0 :     masm.addl(Imm32(1), output);
    2099             :     // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
    2100           0 :     bailoutIf(Assembler::Overflow, lir->snapshot());
    2101           0 :     masm.jump(&end);
    2102             : 
    2103             :     // x <= -1, truncation is the way to go.
    2104           0 :     masm.bind(&lessThanMinusOne);
    2105           0 :     bailoutCvttsd2si(input, output, lir->snapshot());
    2106             : 
    2107           0 :     masm.bind(&end);
    2108             : }
    2109             : 
    2110             : void
    2111           0 : CodeGeneratorX86Shared::visitCeilF(LCeilF* lir)
    2112             : {
    2113           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2114           0 :     ScratchFloat32Scope scratch(masm);
    2115           0 :     Register output = ToRegister(lir->output());
    2116             : 
    2117           0 :     Label bailout, lessThanMinusOne;
    2118             : 
    2119             :     // Bail on ]-1; -0] range
    2120           0 :     masm.loadConstantFloat32(-1.f, scratch);
    2121           0 :     masm.branchFloat(Assembler::DoubleLessThanOrEqualOrUnordered, input,
    2122           0 :                      scratch, &lessThanMinusOne);
    2123             : 
    2124             :     // Test for remaining values with the sign bit set, i.e. ]-1; -0]
    2125           0 :     masm.vmovmskps(input, output);
    2126           0 :     masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
    2127           0 :     bailoutFrom(&bailout, lir->snapshot());
    2128             : 
    2129           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2130             :         // x <= -1 or x > -0
    2131           0 :         masm.bind(&lessThanMinusOne);
    2132             :         // Round toward +Infinity.
    2133           0 :         masm.vroundss(X86Encoding::RoundUp, input, scratch, scratch);
    2134           0 :         bailoutCvttss2si(scratch, output, lir->snapshot());
    2135           0 :         return;
    2136             :     }
    2137             : 
    2138             :     // No SSE4.1
    2139           0 :     Label end;
    2140             : 
    2141             :     // x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
    2142             :     // integer (resp. non-integer) values.
    2143             :     // Will also work for values >= INT_MAX + 1, as the truncate
    2144             :     // operation will return INT_MIN and there'll be a bailout.
    2145           0 :     bailoutCvttss2si(input, output, lir->snapshot());
    2146           0 :     masm.convertInt32ToFloat32(output, scratch);
    2147           0 :     masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
    2148             : 
    2149             :     // Input is not integer-valued, add 1 to obtain the ceiling value
    2150           0 :     masm.addl(Imm32(1), output);
    2151             :     // if input > INT_MAX, output == INT_MAX so adding 1 will overflow.
    2152           0 :     bailoutIf(Assembler::Overflow, lir->snapshot());
    2153           0 :     masm.jump(&end);
    2154             : 
    2155             :     // x <= -1, truncation is the way to go.
    2156           0 :     masm.bind(&lessThanMinusOne);
    2157           0 :     bailoutCvttss2si(input, output, lir->snapshot());
    2158             : 
    2159           0 :     masm.bind(&end);
    2160             : }
    2161             : 
    2162             : void
    2163           0 : CodeGeneratorX86Shared::visitRound(LRound* lir)
    2164             : {
    2165           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2166           0 :     FloatRegister temp = ToFloatRegister(lir->temp());
    2167           0 :     ScratchDoubleScope scratch(masm);
    2168           0 :     Register output = ToRegister(lir->output());
    2169             : 
    2170           0 :     Label negativeOrZero, negative, end, bailout;
    2171             : 
    2172             :     // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
    2173           0 :     masm.zeroDouble(scratch);
    2174           0 :     masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
    2175           0 :     masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
    2176             : 
    2177             :     // Input is positive. Add the biggest double less than 0.5 and
    2178             :     // truncate, rounding down (because if the input is the biggest double less
    2179             :     // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have
    2180             :     // to add the input to the temp register because we're not allowed to
    2181             :     // modify the input register.
    2182           0 :     masm.addDouble(input, temp);
    2183           0 :     bailoutCvttsd2si(temp, output, lir->snapshot());
    2184             : 
    2185           0 :     masm.jump(&end);
    2186             : 
    2187             :     // Input is negative, +0 or -0.
    2188           0 :     masm.bind(&negativeOrZero);
    2189             :     // Branch on negative input.
    2190           0 :     masm.j(Assembler::NotEqual, &negative);
    2191             : 
    2192             :     // Bail on negative-zero.
    2193           0 :     masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false);
    2194           0 :     bailoutFrom(&bailout, lir->snapshot());
    2195             : 
    2196             :     // Input is +0
    2197           0 :     masm.xor32(output, output);
    2198           0 :     masm.jump(&end);
    2199             : 
    2200             :     // Input is negative.
    2201           0 :     masm.bind(&negative);
    2202             : 
    2203             :     // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
    2204             :     // be added the biggest double less than 0.5.
    2205           0 :     Label loadJoin;
    2206           0 :     masm.loadConstantDouble(-0.5, scratch);
    2207           0 :     masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &loadJoin);
    2208           0 :     masm.loadConstantDouble(0.5, temp);
    2209           0 :     masm.bind(&loadJoin);
    2210             : 
    2211           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2212             :         // Add 0.5 and round toward -Infinity. The result is stored in the temp
    2213             :         // register (currently contains 0.5).
    2214           0 :         masm.addDouble(input, temp);
    2215           0 :         masm.vroundsd(X86Encoding::RoundDown, temp, scratch, scratch);
    2216             : 
    2217             :         // Truncate.
    2218           0 :         bailoutCvttsd2si(scratch, output, lir->snapshot());
    2219             : 
    2220             :         // If the result is positive zero, then the actual result is -0. Bail.
    2221             :         // Otherwise, the truncation will have produced the correct negative integer.
    2222           0 :         masm.test32(output, output);
    2223           0 :         bailoutIf(Assembler::Zero, lir->snapshot());
    2224             :     } else {
    2225           0 :         masm.addDouble(input, temp);
    2226             : 
    2227             :         // Round toward -Infinity without the benefit of ROUNDSD.
    2228             :         {
    2229             :             // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
    2230           0 :             masm.compareDouble(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
    2231           0 :             bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
    2232             : 
    2233             :             // Truncate and round toward zero.
    2234             :             // This is off-by-one for everything but integer-valued inputs.
    2235           0 :             bailoutCvttsd2si(temp, output, lir->snapshot());
    2236             : 
    2237             :             // Test whether the truncated double was integer-valued.
    2238           0 :             masm.convertInt32ToDouble(output, scratch);
    2239           0 :             masm.branchDouble(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
    2240             : 
    2241             :             // Input is not integer-valued, so we rounded off-by-one in the
    2242             :             // wrong direction. Correct by subtraction.
    2243           0 :             masm.subl(Imm32(1), output);
    2244             :             // Cannot overflow: output was already checked against INT_MIN.
    2245             :         }
    2246             :     }
    2247             : 
    2248           0 :     masm.bind(&end);
    2249           0 : }
    2250             : 
    2251             : void
    2252           0 : CodeGeneratorX86Shared::visitRoundF(LRoundF* lir)
    2253             : {
    2254           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2255           0 :     FloatRegister temp = ToFloatRegister(lir->temp());
    2256           0 :     ScratchFloat32Scope scratch(masm);
    2257           0 :     Register output = ToRegister(lir->output());
    2258             : 
    2259           0 :     Label negativeOrZero, negative, end, bailout;
    2260             : 
    2261             :     // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
    2262           0 :     masm.zeroFloat32(scratch);
    2263           0 :     masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
    2264           0 :     masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
    2265             : 
    2266             :     // Input is non-negative. Add the biggest float less than 0.5 and truncate,
    2267             :     // rounding down (because if the input is the biggest float less than 0.5,
    2268             :     // adding 0.5 would undesirably round up to 1). Note that we have to add
    2269             :     // the input to the temp register because we're not allowed to modify the
    2270             :     // input register.
    2271           0 :     masm.addFloat32(input, temp);
    2272             : 
    2273           0 :     bailoutCvttss2si(temp, output, lir->snapshot());
    2274             : 
    2275           0 :     masm.jump(&end);
    2276             : 
    2277             :     // Input is negative, +0 or -0.
    2278           0 :     masm.bind(&negativeOrZero);
    2279             :     // Branch on negative input.
    2280           0 :     masm.j(Assembler::NotEqual, &negative);
    2281             : 
    2282             :     // Bail on negative-zero.
    2283           0 :     masm.branchNegativeZeroFloat32(input, output, &bailout);
    2284           0 :     bailoutFrom(&bailout, lir->snapshot());
    2285             : 
    2286             :     // Input is +0.
    2287           0 :     masm.xor32(output, output);
    2288           0 :     masm.jump(&end);
    2289             : 
    2290             :     // Input is negative.
    2291           0 :     masm.bind(&negative);
    2292             : 
    2293             :     // Inputs in ]-0.5; 0] need to be added 0.5, other negative inputs need to
    2294             :     // be added the biggest double less than 0.5.
    2295           0 :     Label loadJoin;
    2296           0 :     masm.loadConstantFloat32(-0.5f, scratch);
    2297           0 :     masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &loadJoin);
    2298           0 :     masm.loadConstantFloat32(0.5f, temp);
    2299           0 :     masm.bind(&loadJoin);
    2300             : 
    2301           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2302             :         // Add 0.5 and round toward -Infinity. The result is stored in the temp
    2303             :         // register (currently contains 0.5).
    2304           0 :         masm.addFloat32(input, temp);
    2305           0 :         masm.vroundss(X86Encoding::RoundDown, temp, scratch, scratch);
    2306             : 
    2307             :         // Truncate.
    2308           0 :         bailoutCvttss2si(scratch, output, lir->snapshot());
    2309             : 
    2310             :         // If the result is positive zero, then the actual result is -0. Bail.
    2311             :         // Otherwise, the truncation will have produced the correct negative integer.
    2312           0 :         masm.test32(output, output);
    2313           0 :         bailoutIf(Assembler::Zero, lir->snapshot());
    2314             :     } else {
    2315           0 :         masm.addFloat32(input, temp);
    2316             :         // Round toward -Infinity without the benefit of ROUNDSS.
    2317             :         {
    2318             :             // If input + 0.5 >= 0, input is a negative number >= -0.5 and the result is -0.
    2319           0 :             masm.compareFloat(Assembler::DoubleGreaterThanOrEqual, temp, scratch);
    2320           0 :             bailoutIf(Assembler::DoubleGreaterThanOrEqual, lir->snapshot());
    2321             : 
    2322             :             // Truncate and round toward zero.
    2323             :             // This is off-by-one for everything but integer-valued inputs.
    2324           0 :             bailoutCvttss2si(temp, output, lir->snapshot());
    2325             : 
    2326             :             // Test whether the truncated double was integer-valued.
    2327           0 :             masm.convertInt32ToFloat32(output, scratch);
    2328           0 :             masm.branchFloat(Assembler::DoubleEqualOrUnordered, temp, scratch, &end);
    2329             : 
    2330             :             // Input is not integer-valued, so we rounded off-by-one in the
    2331             :             // wrong direction. Correct by subtraction.
    2332           0 :             masm.subl(Imm32(1), output);
    2333             :             // Cannot overflow: output was already checked against INT_MIN.
    2334             :         }
    2335             :     }
    2336             : 
    2337           0 :     masm.bind(&end);
    2338           0 : }
    2339             : 
    2340             : void
    2341           0 : CodeGeneratorX86Shared::visitNearbyInt(LNearbyInt* lir)
    2342             : {
    2343           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2344           0 :     FloatRegister output = ToFloatRegister(lir->output());
    2345             : 
    2346           0 :     RoundingMode roundingMode = lir->mir()->roundingMode();
    2347           0 :     masm.vroundsd(Assembler::ToX86RoundingMode(roundingMode), input, output, output);
    2348           0 : }
    2349             : 
    2350             : void
    2351           0 : CodeGeneratorX86Shared::visitNearbyIntF(LNearbyIntF* lir)
    2352             : {
    2353           0 :     FloatRegister input = ToFloatRegister(lir->input());
    2354           0 :     FloatRegister output = ToFloatRegister(lir->output());
    2355             : 
    2356           0 :     RoundingMode roundingMode = lir->mir()->roundingMode();
    2357           0 :     masm.vroundss(Assembler::ToX86RoundingMode(roundingMode), input, output, output);
    2358           0 : }
    2359             : 
    2360             : void
    2361           8 : CodeGeneratorX86Shared::visitGuardShape(LGuardShape* guard)
    2362             : {
    2363           8 :     Register obj = ToRegister(guard->input());
    2364           8 :     masm.cmpPtr(Operand(obj, ShapedObject::offsetOfShape()), ImmGCPtr(guard->mir()->shape()));
    2365             : 
    2366           8 :     bailoutIf(Assembler::NotEqual, guard->snapshot());
    2367           8 : }
    2368             : 
    2369             : void
    2370           0 : CodeGeneratorX86Shared::visitGuardObjectGroup(LGuardObjectGroup* guard)
    2371             : {
    2372           0 :     Register obj = ToRegister(guard->input());
    2373             : 
    2374           0 :     masm.cmpPtr(Operand(obj, JSObject::offsetOfGroup()), ImmGCPtr(guard->mir()->group()));
    2375             : 
    2376             :     Assembler::Condition cond =
    2377           0 :         guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual;
    2378           0 :     bailoutIf(cond, guard->snapshot());
    2379           0 : }
    2380             : 
    2381             : void
    2382           0 : CodeGeneratorX86Shared::visitGuardClass(LGuardClass* guard)
    2383             : {
    2384           0 :     Register obj = ToRegister(guard->input());
    2385           0 :     Register tmp = ToRegister(guard->tempInt());
    2386             : 
    2387           0 :     masm.loadPtr(Address(obj, JSObject::offsetOfGroup()), tmp);
    2388           0 :     masm.cmpPtr(Operand(tmp, ObjectGroup::offsetOfClasp()), ImmPtr(guard->mir()->getClass()));
    2389           0 :     bailoutIf(Assembler::NotEqual, guard->snapshot());
    2390           0 : }
    2391             : 
    2392             : void
    2393           0 : CodeGeneratorX86Shared::visitEffectiveAddress(LEffectiveAddress* ins)
    2394             : {
    2395           0 :     const MEffectiveAddress* mir = ins->mir();
    2396           0 :     Register base = ToRegister(ins->base());
    2397           0 :     Register index = ToRegister(ins->index());
    2398           0 :     Register output = ToRegister(ins->output());
    2399           0 :     masm.leal(Operand(base, index, mir->scale(), mir->displacement()), output);
    2400           0 : }
    2401             : 
    2402             : void
    2403           8 : CodeGeneratorX86Shared::generateInvalidateEpilogue()
    2404             : {
    2405             :     // Ensure that there is enough space in the buffer for the OsiPoint
    2406             :     // patching to occur. Otherwise, we could overwrite the invalidation
    2407             :     // epilogue.
    2408          72 :     for (size_t i = 0; i < sizeof(void*); i += Assembler::NopSize())
    2409          64 :         masm.nop();
    2410             : 
    2411           8 :     masm.bind(&invalidate_);
    2412             : 
    2413             :     // Push the Ion script onto the stack (when we determine what that pointer is).
    2414           8 :     invalidateEpilogueData_ = masm.pushWithPatch(ImmWord(uintptr_t(-1)));
    2415           8 :     JitCode* thunk = gen->jitRuntime()->getInvalidationThunk();
    2416             : 
    2417           8 :     masm.call(thunk);
    2418             : 
    2419             :     // We should never reach this point in JIT code -- the invalidation thunk should
    2420             :     // pop the invalidated JS frame and return directly to its caller.
    2421           8 :     masm.assumeUnreachable("Should have returned directly to its caller instead of here.");
    2422           8 : }
    2423             : 
    2424             : void
    2425           0 : CodeGeneratorX86Shared::visitNegI(LNegI* ins)
    2426             : {
    2427           0 :     Register input = ToRegister(ins->input());
    2428           0 :     MOZ_ASSERT(input == ToRegister(ins->output()));
    2429             : 
    2430           0 :     masm.neg32(input);
    2431           0 : }
    2432             : 
    2433             : void
    2434           0 : CodeGeneratorX86Shared::visitNegD(LNegD* ins)
    2435             : {
    2436           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2437           0 :     MOZ_ASSERT(input == ToFloatRegister(ins->output()));
    2438             : 
    2439           0 :     masm.negateDouble(input);
    2440           0 : }
    2441             : 
    2442             : void
    2443           0 : CodeGeneratorX86Shared::visitNegF(LNegF* ins)
    2444             : {
    2445           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2446           0 :     MOZ_ASSERT(input == ToFloatRegister(ins->output()));
    2447             : 
    2448           0 :     masm.negateFloat(input);
    2449           0 : }
    2450             : 
    2451             : void
    2452           0 : CodeGeneratorX86Shared::visitSimd128Int(LSimd128Int* ins)
    2453             : {
    2454           0 :     const LDefinition* out = ins->getDef(0);
    2455           0 :     masm.loadConstantSimd128Int(ins->getValue(), ToFloatRegister(out));
    2456           0 : }
    2457             : 
    2458             : void
    2459           0 : CodeGeneratorX86Shared::visitSimd128Float(LSimd128Float* ins)
    2460             : {
    2461           0 :     const LDefinition* out = ins->getDef(0);
    2462           0 :     masm.loadConstantSimd128Float(ins->getValue(), ToFloatRegister(out));
    2463           0 : }
    2464             : 
    2465             : void
    2466           0 : CodeGeneratorX86Shared::visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins)
    2467             : {
    2468           0 :     FloatRegister in = ToFloatRegister(ins->input());
    2469           0 :     FloatRegister out = ToFloatRegister(ins->output());
    2470           0 :     masm.convertInt32x4ToFloat32x4(in, out);
    2471           0 : }
    2472             : 
    2473             : void
    2474           0 : CodeGeneratorX86Shared::visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins)
    2475             : {
    2476           0 :     FloatRegister in = ToFloatRegister(ins->input());
    2477           0 :     FloatRegister out = ToFloatRegister(ins->output());
    2478           0 :     Register temp = ToRegister(ins->temp());
    2479             : 
    2480           0 :     masm.convertFloat32x4ToInt32x4(in, out);
    2481             : 
    2482           0 :     auto* ool = new(alloc()) OutOfLineSimdFloatToIntCheck(temp, in, ins,
    2483           0 :                                                           ins->mir()->bytecodeOffset());
    2484           0 :     addOutOfLineCode(ool, ins->mir());
    2485             : 
    2486           0 :     static const SimdConstant InvalidResult = SimdConstant::SplatX4(int32_t(-2147483648));
    2487             : 
    2488           0 :     ScratchSimd128Scope scratch(masm);
    2489           0 :     masm.loadConstantSimd128Int(InvalidResult, scratch);
    2490           0 :     masm.packedEqualInt32x4(Operand(out), scratch);
    2491             :     // TODO (bug 1156228): If we have SSE4.1, we can use PTEST here instead of
    2492             :     // the two following instructions.
    2493           0 :     masm.vmovmskps(scratch, temp);
    2494           0 :     masm.cmp32(temp, Imm32(0));
    2495           0 :     masm.j(Assembler::NotEqual, ool->entry());
    2496             : 
    2497           0 :     masm.bind(ool->rejoin());
    2498           0 : }
    2499             : 
    2500             : void
    2501           0 : CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIntCheck *ool)
    2502             : {
    2503           0 :     static const SimdConstant Int32MaxX4 = SimdConstant::SplatX4(2147483647.f);
    2504           0 :     static const SimdConstant Int32MinX4 = SimdConstant::SplatX4(-2147483648.f);
    2505             : 
    2506           0 :     Label onConversionError;
    2507             : 
    2508           0 :     FloatRegister input = ool->input();
    2509           0 :     Register temp = ool->temp();
    2510             : 
    2511           0 :     ScratchSimd128Scope scratch(masm);
    2512           0 :     masm.loadConstantSimd128Float(Int32MinX4, scratch);
    2513           0 :     masm.vcmpleps(Operand(input), scratch, scratch);
    2514           0 :     masm.vmovmskps(scratch, temp);
    2515           0 :     masm.cmp32(temp, Imm32(15));
    2516           0 :     masm.j(Assembler::NotEqual, &onConversionError);
    2517             : 
    2518           0 :     masm.loadConstantSimd128Float(Int32MaxX4, scratch);
    2519           0 :     masm.vcmpleps(Operand(input), scratch, scratch);
    2520           0 :     masm.vmovmskps(scratch, temp);
    2521           0 :     masm.cmp32(temp, Imm32(0));
    2522           0 :     masm.j(Assembler::NotEqual, &onConversionError);
    2523             : 
    2524           0 :     masm.jump(ool->rejoin());
    2525             : 
    2526           0 :     if (gen->compilingWasm()) {
    2527           0 :         masm.bindLater(&onConversionError, trap(ool, wasm::Trap::ImpreciseSimdConversion));
    2528             :     } else {
    2529           0 :         masm.bind(&onConversionError);
    2530           0 :         bailout(ool->ins()->snapshot());
    2531             :     }
    2532           0 : }
    2533             : 
    2534             : // Convert Float32x4 to Uint32x4.
    2535             : //
    2536             : // If any input lane value is out of range or NaN, bail out.
    2537             : void
    2538           0 : CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
    2539             : {
    2540           0 :     const MSimdConvert* mir = ins->mir();
    2541           0 :     FloatRegister in = ToFloatRegister(ins->input());
    2542           0 :     FloatRegister out = ToFloatRegister(ins->output());
    2543           0 :     Register temp = ToRegister(ins->tempR());
    2544           0 :     FloatRegister tempF = ToFloatRegister(ins->tempF());
    2545             : 
    2546             :     // Classify lane values into 4 disjoint classes:
    2547             :     //
    2548             :     //   N-lanes:             in <= -1.0
    2549             :     //   A-lanes:      -1.0 < in <= 0x0.ffffffp31
    2550             :     //   B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32
    2551             :     //   V-lanes: 0x1.0p32 <= in, or isnan(in)
    2552             :     //
    2553             :     // We need to bail out to throw a RangeError if we see any N-lanes or
    2554             :     // V-lanes.
    2555             :     //
    2556             :     // For A-lanes and B-lanes, we make two float -> int32 conversions:
    2557             :     //
    2558             :     //   A = cvttps2dq(in)
    2559             :     //   B = cvttps2dq(in - 0x1.0p31f)
    2560             :     //
    2561             :     // Note that the subtraction for the B computation is exact for B-lanes.
    2562             :     // There is no rounding, so B is the low 31 bits of the correctly converted
    2563             :     // result.
    2564             :     //
    2565             :     // The cvttps2dq instruction produces 0x80000000 when the input is NaN or
    2566             :     // out of range for a signed int32_t. This conveniently provides the missing
    2567             :     // high bit for B, so the desired result is A for A-lanes and A|B for
    2568             :     // B-lanes.
    2569             : 
    2570           0 :     ScratchSimd128Scope scratch(masm);
    2571             : 
    2572             :     // TODO: If the majority of lanes are A-lanes, it could be faster to compute
    2573             :     // A first, use vmovmskps to check for any non-A-lanes and handle them in
    2574             :     // ool code. OTOH, we we're wrong about the lane distribution, that would be
    2575             :     // slower.
    2576             : 
    2577             :     // Compute B in |scratch|.
    2578             :     static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
    2579           0 :     static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
    2580           0 :     masm.loadConstantSimd128Float(Bias, scratch);
    2581           0 :     masm.packedAddFloat32(Operand(in), scratch);
    2582           0 :     masm.convertFloat32x4ToInt32x4(scratch, scratch);
    2583             : 
    2584             :     // Compute A in |out|. This is the last time we use |in| and the first time
    2585             :     // we use |out|, so we can tolerate if they are the same register.
    2586           0 :     masm.convertFloat32x4ToInt32x4(in, out);
    2587             : 
    2588             :     // We can identify A-lanes by the sign bits in A: Any A-lanes will be
    2589             :     // positive in A, and N, B, and V-lanes will be 0x80000000 in A. Compute a
    2590             :     // mask of non-A-lanes into |tempF|.
    2591           0 :     masm.zeroSimd128Float(tempF);
    2592           0 :     masm.packedGreaterThanInt32x4(Operand(out), tempF);
    2593             : 
    2594             :     // Clear the A-lanes in B.
    2595           0 :     masm.bitwiseAndSimd128(Operand(tempF), scratch);
    2596             : 
    2597             :     // Compute the final result: A for A-lanes, A|B for B-lanes.
    2598           0 :     masm.bitwiseOrSimd128(Operand(scratch), out);
    2599             : 
    2600             :     // We still need to filter out the V-lanes. They would show up as 0x80000000
    2601             :     // in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
    2602             :     // the remaining negative lanes in B.
    2603           0 :     masm.vmovmskps(scratch, temp);
    2604           0 :     masm.cmp32(temp, Imm32(0));
    2605             : 
    2606           0 :     if (gen->compilingWasm())
    2607           0 :         masm.j(Assembler::NotEqual, trap(mir, wasm::Trap::ImpreciseSimdConversion));
    2608             :     else
    2609           0 :         bailoutIf(Assembler::NotEqual, ins->snapshot());
    2610           0 : }
    2611             : 
    2612             : void
    2613           0 : CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins)
    2614             : {
    2615           0 :     MOZ_ASSERT(ins->mir()->type() == MIRType::Int32x4 || ins->mir()->type() == MIRType::Bool32x4);
    2616             : 
    2617           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2618           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2619           0 :         masm.vmovd(ToRegister(ins->getOperand(0)), output);
    2620           0 :         for (size_t i = 1; i < 4; ++i) {
    2621           0 :             Register r = ToRegister(ins->getOperand(i));
    2622           0 :             masm.vpinsrd(i, r, output, output);
    2623             :         }
    2624           0 :         return;
    2625             :     }
    2626             : 
    2627           0 :     masm.reserveStack(Simd128DataSize);
    2628           0 :     for (size_t i = 0; i < 4; ++i) {
    2629           0 :         Register r = ToRegister(ins->getOperand(i));
    2630           0 :         masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
    2631             :     }
    2632           0 :     masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
    2633           0 :     masm.freeStack(Simd128DataSize);
    2634             : }
    2635             : 
    2636             : void
    2637           0 : CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4* ins)
    2638             : {
    2639           0 :     MOZ_ASSERT(ins->mir()->type() == MIRType::Float32x4);
    2640             : 
    2641           0 :     FloatRegister r0 = ToFloatRegister(ins->getOperand(0));
    2642           0 :     FloatRegister r1 = ToFloatRegister(ins->getOperand(1));
    2643           0 :     FloatRegister r2 = ToFloatRegister(ins->getOperand(2));
    2644           0 :     FloatRegister r3 = ToFloatRegister(ins->getOperand(3));
    2645           0 :     FloatRegister tmp = ToFloatRegister(ins->getTemp(0));
    2646           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2647             : 
    2648           0 :     FloatRegister r0Copy = masm.reusedInputFloat32x4(r0, output);
    2649           0 :     FloatRegister r1Copy = masm.reusedInputFloat32x4(r1, tmp);
    2650             : 
    2651           0 :     masm.vunpcklps(r3, r1Copy, tmp);
    2652           0 :     masm.vunpcklps(r2, r0Copy, output);
    2653           0 :     masm.vunpcklps(tmp, output, output);
    2654           0 : }
    2655             : 
    2656             : void
    2657           0 : CodeGeneratorX86Shared::visitSimdSplatX16(LSimdSplatX16* ins)
    2658             : {
    2659           0 :     MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 16);
    2660           0 :     Register input = ToRegister(ins->getOperand(0));
    2661           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2662           0 :     masm.vmovd(input, output);
    2663           0 :     if (AssemblerX86Shared::HasSSSE3()) {
    2664           0 :         masm.zeroSimd128Int(ScratchSimd128Reg);
    2665           0 :         masm.vpshufb(ScratchSimd128Reg, output, output);
    2666             :     } else {
    2667             :         // Use two shifts to duplicate the low 8 bits into the low 16 bits.
    2668           0 :         masm.vpsllw(Imm32(8), output, output);
    2669           0 :         masm.vmovdqa(output, ScratchSimd128Reg);
    2670           0 :         masm.vpsrlw(Imm32(8), ScratchSimd128Reg, ScratchSimd128Reg);
    2671           0 :         masm.vpor(ScratchSimd128Reg, output, output);
    2672             :         // Then do an X8 splat.
    2673           0 :         masm.vpshuflw(0, output, output);
    2674           0 :         masm.vpshufd(0, output, output);
    2675             :     }
    2676           0 : }
    2677             : 
    2678             : void
    2679           0 : CodeGeneratorX86Shared::visitSimdSplatX8(LSimdSplatX8* ins)
    2680             : {
    2681           0 :     MOZ_ASSERT(SimdTypeToLength(ins->mir()->type()) == 8);
    2682           0 :     Register input = ToRegister(ins->getOperand(0));
    2683           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2684           0 :     masm.vmovd(input, output);
    2685           0 :     masm.vpshuflw(0, output, output);
    2686           0 :     masm.vpshufd(0, output, output);
    2687           0 : }
    2688             : 
    2689             : void
    2690           0 : CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4* ins)
    2691             : {
    2692           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2693             : 
    2694           0 :     MSimdSplat* mir = ins->mir();
    2695           0 :     MOZ_ASSERT(IsSimdType(mir->type()));
    2696             :     JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
    2697             : 
    2698           0 :     if (mir->type() == MIRType::Float32x4) {
    2699           0 :         FloatRegister r = ToFloatRegister(ins->getOperand(0));
    2700           0 :         FloatRegister rCopy = masm.reusedInputFloat32x4(r, output);
    2701           0 :         masm.vshufps(0, rCopy, rCopy, output);
    2702             :     } else {
    2703           0 :         Register r = ToRegister(ins->getOperand(0));
    2704           0 :         masm.vmovd(r, output);
    2705           0 :         masm.vpshufd(0, output, output);
    2706             :     }
    2707           0 : }
    2708             : 
    2709             : void
    2710           0 : CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
    2711             : {
    2712           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2713           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2714             : 
    2715           0 :     if (input.aliases(output))
    2716           0 :         return;
    2717             : 
    2718           0 :     if (IsIntegerSimdType(ins->mir()->type()))
    2719           0 :         masm.vmovdqa(input, output);
    2720             :     else
    2721           0 :         masm.vmovaps(input, output);
    2722             : }
    2723             : 
    2724             : // Extract an integer lane from the 32x4 vector register |input| and place it in
    2725             : // |output|.
    2726             : void
    2727           0 : CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
    2728             : {
    2729           0 :     if (lane == 0) {
    2730             :         // The value we want to extract is in the low double-word
    2731           0 :         masm.moveLowInt32(input, output);
    2732           0 :     } else if (AssemblerX86Shared::HasSSE41()) {
    2733           0 :         masm.vpextrd(lane, input, output);
    2734             :     } else {
    2735           0 :         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
    2736           0 :         masm.shuffleInt32(mask, input, ScratchSimd128Reg);
    2737           0 :         masm.moveLowInt32(ScratchSimd128Reg, output);
    2738             :     }
    2739           0 : }
    2740             : 
    2741             : // Extract an integer lane from the 16x8 vector register |input|, sign- or
    2742             : // zero-extend to 32 bits and place the result in |output|.
    2743             : void
    2744           0 : CodeGeneratorX86Shared::emitSimdExtractLane16x8(FloatRegister input, Register output,
    2745             :                                                 unsigned lane, SimdSign signedness)
    2746             : {
    2747             :     // Unlike pextrd and pextrb, this is available in SSE2.
    2748           0 :     masm.vpextrw(lane, input, output);
    2749             : 
    2750           0 :     if (signedness == SimdSign::Signed)
    2751           0 :         masm.movswl(output, output);
    2752           0 : }
    2753             : 
    2754             : // Extract an integer lane from the 8x16 vector register |input|, sign- or
    2755             : // zero-extend to 32 bits and place the result in |output|.
    2756             : void
    2757           0 : CodeGeneratorX86Shared::emitSimdExtractLane8x16(FloatRegister input, Register output,
    2758             :                                                 unsigned lane, SimdSign signedness)
    2759             : {
    2760           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2761           0 :         masm.vpextrb(lane, input, output);
    2762             :         // vpextrb clears the high bits, so no further extension required.
    2763           0 :         if (signedness == SimdSign::Unsigned)
    2764           0 :             signedness = SimdSign::NotApplicable;
    2765             :     } else {
    2766             :         // Extract the relevant 16 bits containing our lane, then shift the
    2767             :         // right 8 bits into place.
    2768           0 :         emitSimdExtractLane16x8(input, output, lane / 2, SimdSign::Unsigned);
    2769           0 :         if (lane % 2) {
    2770           0 :             masm.shrl(Imm32(8), output);
    2771             :             // The shrl handles the zero-extension. Don't repeat it.
    2772           0 :             if (signedness == SimdSign::Unsigned)
    2773           0 :                 signedness = SimdSign::NotApplicable;
    2774             :         }
    2775             :     }
    2776             : 
    2777             :     // We have the right low 8 bits in |output|, but we may need to fix the high
    2778             :     // bits. Note that this requires |output| to be one of the %eax-%edx
    2779             :     // registers.
    2780           0 :     switch (signedness) {
    2781             :       case SimdSign::Signed:
    2782           0 :         masm.movsbl(output, output);
    2783           0 :         break;
    2784             :       case SimdSign::Unsigned:
    2785           0 :         masm.movzbl(output, output);
    2786           0 :         break;
    2787             :       case SimdSign::NotApplicable:
    2788             :         // No adjustment needed.
    2789           0 :         break;
    2790             :     }
    2791           0 : }
    2792             : 
    2793             : void
    2794           0 : CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
    2795             : {
    2796           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2797           0 :     Register output = ToRegister(ins->output());
    2798           0 :     MSimdExtractElement* mir = ins->mir();
    2799           0 :     unsigned length = SimdTypeToLength(mir->specialization());
    2800             : 
    2801           0 :     switch (length) {
    2802             :       case 4:
    2803           0 :         emitSimdExtractLane32x4(input, output, mir->lane());
    2804           0 :         break;
    2805             :       case 8:
    2806             :         // Get a lane, don't bother fixing the high bits since we'll mask below.
    2807           0 :         emitSimdExtractLane16x8(input, output, mir->lane(), SimdSign::NotApplicable);
    2808           0 :         break;
    2809             :       case 16:
    2810           0 :         emitSimdExtractLane8x16(input, output, mir->lane(), SimdSign::NotApplicable);
    2811           0 :         break;
    2812             :       default:
    2813           0 :         MOZ_CRASH("Unhandled SIMD length");
    2814             :     }
    2815             : 
    2816             :     // We need to generate a 0/1 value. We have 0/-1 and possibly dirty high bits.
    2817           0 :     masm.and32(Imm32(1), output);
    2818           0 : }
    2819             : 
    2820             : void
    2821           0 : CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
    2822             : {
    2823           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2824           0 :     Register output = ToRegister(ins->output());
    2825           0 :     MSimdExtractElement* mir = ins->mir();
    2826           0 :     unsigned length = SimdTypeToLength(mir->specialization());
    2827             : 
    2828           0 :     switch (length) {
    2829             :       case 4:
    2830           0 :         emitSimdExtractLane32x4(input, output, mir->lane());
    2831           0 :         break;
    2832             :       case 8:
    2833           0 :         emitSimdExtractLane16x8(input, output, mir->lane(), mir->signedness());
    2834           0 :         break;
    2835             :       case 16:
    2836           0 :         emitSimdExtractLane8x16(input, output, mir->lane(), mir->signedness());
    2837           0 :         break;
    2838             :       default:
    2839           0 :         MOZ_CRASH("Unhandled SIMD length");
    2840             :     }
    2841           0 : }
    2842             : 
    2843             : void
    2844           0 : CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
    2845             : {
    2846           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2847           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2848           0 :     Register temp = ToRegister(ins->temp());
    2849           0 :     MSimdExtractElement* mir = ins->mir();
    2850           0 :     MOZ_ASSERT(mir->specialization() == MIRType::Int32x4);
    2851           0 :     emitSimdExtractLane32x4(input, temp, mir->lane());
    2852           0 :     masm.convertUInt32ToDouble(temp, output);
    2853           0 : }
    2854             : 
    2855             : void
    2856           0 : CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
    2857             : {
    2858           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2859           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2860             : 
    2861           0 :     unsigned lane = ins->mir()->lane();
    2862           0 :     if (lane == 0) {
    2863             :         // The value we want to extract is in the low double-word
    2864           0 :         if (input != output)
    2865           0 :             masm.moveFloat32(input, output);
    2866           0 :     } else if (lane == 2) {
    2867           0 :         masm.moveHighPairToLowPairFloat32(input, output);
    2868             :     } else {
    2869           0 :         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
    2870           0 :         masm.shuffleFloat32(mask, input, output);
    2871             :     }
    2872             :     // NaNs contained within SIMD values are not enforced to be canonical, so
    2873             :     // when we extract an element into a "regular" scalar JS value, we have to
    2874             :     // canonicalize. In wasm code, we can skip this, as wasm only has to
    2875             :     // canonicalize NaNs at FFI boundaries.
    2876           0 :     if (!gen->compilingWasm())
    2877           0 :         masm.canonicalizeFloat(output);
    2878           0 : }
    2879             : 
    2880             : void
    2881           0 : CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI* ins)
    2882             : {
    2883           0 :     FloatRegister vector = ToFloatRegister(ins->vector());
    2884           0 :     Register value = ToRegister(ins->value());
    2885           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2886           0 :     MOZ_ASSERT(vector == output); // defineReuseInput(0)
    2887             : 
    2888           0 :     unsigned lane = ins->lane();
    2889           0 :     unsigned length = ins->length();
    2890             : 
    2891           0 :     if (length == 8) {
    2892             :         // Available in SSE 2.
    2893           0 :         masm.vpinsrw(lane, value, vector, output);
    2894           0 :         return;
    2895             :     }
    2896             : 
    2897             :     // Note that, contrarily to float32x4, we cannot use vmovd if the inserted
    2898             :     // value goes into the first component, as vmovd clears out the higher lanes
    2899             :     // of the output.
    2900           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2901             :         // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX.
    2902           0 :         switch (length) {
    2903             :           case 4:
    2904           0 :             masm.vpinsrd(lane, value, vector, output);
    2905           0 :             return;
    2906             :           case 16:
    2907           0 :             masm.vpinsrb(lane, value, vector, output);
    2908           0 :             return;
    2909             :         }
    2910             :     }
    2911             : 
    2912           0 :     masm.reserveStack(Simd128DataSize);
    2913           0 :     masm.storeAlignedSimd128Int(vector, Address(StackPointer, 0));
    2914           0 :     switch (length) {
    2915             :       case 4:
    2916           0 :         masm.store32(value, Address(StackPointer, lane * sizeof(int32_t)));
    2917           0 :         break;
    2918             :       case 16:
    2919             :         // Note that this requires `value` to be in one the registers where the
    2920             :         // low 8 bits are addressible (%eax - %edx on x86, all of them on x86-64).
    2921           0 :         masm.store8(value, Address(StackPointer, lane * sizeof(int8_t)));
    2922           0 :         break;
    2923             :       default:
    2924           0 :         MOZ_CRASH("Unsupported SIMD length");
    2925             :     }
    2926           0 :     masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
    2927           0 :     masm.freeStack(Simd128DataSize);
    2928             : }
    2929             : 
    2930             : void
    2931           0 : CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF* ins)
    2932             : {
    2933           0 :     FloatRegister vector = ToFloatRegister(ins->vector());
    2934           0 :     FloatRegister value = ToFloatRegister(ins->value());
    2935           0 :     FloatRegister output = ToFloatRegister(ins->output());
    2936           0 :     MOZ_ASSERT(vector == output); // defineReuseInput(0)
    2937             : 
    2938           0 :     if (ins->lane() == 0) {
    2939             :         // As both operands are registers, vmovss doesn't modify the upper bits
    2940             :         // of the destination operand.
    2941           0 :         if (value != output)
    2942           0 :             masm.vmovss(value, vector, output);
    2943           0 :         return;
    2944             :     }
    2945             : 
    2946           0 :     if (AssemblerX86Shared::HasSSE41()) {
    2947             :         // The input value is in the low float32 of the 'value' FloatRegister.
    2948           0 :         masm.vinsertps(masm.vinsertpsMask(0, ins->lane()), value, output, output);
    2949           0 :         return;
    2950             :     }
    2951             : 
    2952           0 :     unsigned component = unsigned(ins->lane());
    2953           0 :     masm.reserveStack(Simd128DataSize);
    2954           0 :     masm.storeAlignedSimd128Float(vector, Address(StackPointer, 0));
    2955           0 :     masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t)));
    2956           0 :     masm.loadAlignedSimd128Float(Address(StackPointer, 0), output);
    2957           0 :     masm.freeStack(Simd128DataSize);
    2958             : }
    2959             : 
    2960             : void
    2961           0 : CodeGeneratorX86Shared::visitSimdAllTrue(LSimdAllTrue* ins)
    2962             : {
    2963           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2964           0 :     Register output = ToRegister(ins->output());
    2965             : 
    2966             :     // We know that the input lanes are boolean, so they are either 0 or -1.
    2967             :     // The all-true vector has all 128 bits set, no matter the lane geometry.
    2968           0 :     masm.vpmovmskb(input, output);
    2969           0 :     masm.cmp32(output, Imm32(0xffff));
    2970           0 :     masm.emitSet(Assembler::Zero, output);
    2971           0 : }
    2972             : 
    2973             : void
    2974           0 : CodeGeneratorX86Shared::visitSimdAnyTrue(LSimdAnyTrue* ins)
    2975             : {
    2976           0 :     FloatRegister input = ToFloatRegister(ins->input());
    2977           0 :     Register output = ToRegister(ins->output());
    2978             : 
    2979           0 :     masm.vpmovmskb(input, output);
    2980           0 :     masm.cmp32(output, Imm32(0x0));
    2981           0 :     masm.emitSet(Assembler::NonZero, output);
    2982           0 : }
    2983             : 
    2984             : template <class T, class Reg> void
    2985           0 : CodeGeneratorX86Shared::visitSimdGeneralShuffle(LSimdGeneralShuffleBase* ins, Reg tempRegister)
    2986             : {
    2987           0 :     MSimdGeneralShuffle* mir = ins->mir();
    2988           0 :     unsigned numVectors = mir->numVectors();
    2989             : 
    2990           0 :     Register laneTemp = ToRegister(ins->temp());
    2991             : 
    2992             :     // This won't generate fast code, but it's fine because we expect users
    2993             :     // to have used constant indices (and thus MSimdGeneralShuffle to be fold
    2994             :     // into MSimdSwizzle/MSimdShuffle, which are fast).
    2995             : 
    2996             :     // We need stack space for the numVectors inputs and for the output vector.
    2997           0 :     unsigned stackSpace = Simd128DataSize * (numVectors + 1);
    2998           0 :     masm.reserveStack(stackSpace);
    2999             : 
    3000           0 :     for (unsigned i = 0; i < numVectors; i++) {
    3001           0 :         masm.storeAlignedVector<T>(ToFloatRegister(ins->vector(i)),
    3002             :                                    Address(StackPointer, Simd128DataSize * (1 + i)));
    3003             :     }
    3004             : 
    3005           0 :     Label bail;
    3006           0 :     const Scale laneScale = ScaleFromElemWidth(sizeof(T));
    3007             : 
    3008           0 :     for (size_t i = 0; i < mir->numLanes(); i++) {
    3009           0 :         Operand lane = ToOperand(ins->lane(i));
    3010             : 
    3011           0 :         masm.cmp32(lane, Imm32(numVectors * mir->numLanes() - 1));
    3012           0 :         masm.j(Assembler::Above, &bail);
    3013             : 
    3014           0 :         if (lane.kind() == Operand::REG) {
    3015           0 :             masm.loadScalar<T>(Operand(StackPointer, ToRegister(ins->lane(i)), laneScale, Simd128DataSize),
    3016             :                                tempRegister);
    3017             :         } else {
    3018           0 :             masm.load32(lane, laneTemp);
    3019           0 :             masm.loadScalar<T>(Operand(StackPointer, laneTemp, laneScale, Simd128DataSize), tempRegister);
    3020             :         }
    3021             : 
    3022           0 :         masm.storeScalar<T>(tempRegister, Address(StackPointer, i * sizeof(T)));
    3023             :     }
    3024             : 
    3025           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3026           0 :     masm.loadAlignedVector<T>(Address(StackPointer, 0), output);
    3027             : 
    3028           0 :     Label join;
    3029           0 :     masm.jump(&join);
    3030             : 
    3031             :     {
    3032           0 :         masm.bind(&bail);
    3033           0 :         masm.freeStack(stackSpace);
    3034           0 :         bailout(ins->snapshot());
    3035             :     }
    3036             : 
    3037           0 :     masm.bind(&join);
    3038           0 :     masm.setFramePushed(masm.framePushed() + stackSpace);
    3039           0 :     masm.freeStack(stackSpace);
    3040           0 : }
    3041             : 
    3042             : void
    3043           0 : CodeGeneratorX86Shared::visitSimdGeneralShuffleI(LSimdGeneralShuffleI* ins)
    3044             : {
    3045           0 :     switch (ins->mir()->type()) {
    3046             :       case MIRType::Int8x16:
    3047           0 :         return visitSimdGeneralShuffle<int8_t, Register>(ins, ToRegister(ins->temp()));
    3048             :       case MIRType::Int16x8:
    3049           0 :         return visitSimdGeneralShuffle<int16_t, Register>(ins, ToRegister(ins->temp()));
    3050             :       case MIRType::Int32x4:
    3051           0 :         return visitSimdGeneralShuffle<int32_t, Register>(ins, ToRegister(ins->temp()));
    3052             :       default:
    3053           0 :         MOZ_CRASH("unsupported type for general shuffle");
    3054             :     }
    3055             : }
    3056             : void
    3057           0 : CodeGeneratorX86Shared::visitSimdGeneralShuffleF(LSimdGeneralShuffleF* ins)
    3058             : {
    3059           0 :     ScratchFloat32Scope scratch(masm);
    3060           0 :     visitSimdGeneralShuffle<float, FloatRegister>(ins, scratch);
    3061           0 : }
    3062             : 
    3063             : void
    3064           0 : CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI* ins)
    3065             : {
    3066           0 :     FloatRegister input = ToFloatRegister(ins->input());
    3067           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3068           0 :     const unsigned numLanes = ins->numLanes();
    3069             : 
    3070           0 :     switch (numLanes) {
    3071             :         case 4: {
    3072           0 :             uint32_t x = ins->lane(0);
    3073           0 :             uint32_t y = ins->lane(1);
    3074           0 :             uint32_t z = ins->lane(2);
    3075           0 :             uint32_t w = ins->lane(3);
    3076             : 
    3077           0 :             uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
    3078           0 :             masm.shuffleInt32(mask, input, output);
    3079           0 :             return;
    3080             :         }
    3081             :     }
    3082             : 
    3083             :     // In the general case, use pshufb if it is available. Convert to a
    3084             :     // byte-wise swizzle.
    3085           0 :     const unsigned bytesPerLane = 16 / numLanes;
    3086             :     int8_t bLane[16];
    3087           0 :     for (unsigned i = 0; i < numLanes; i++) {
    3088           0 :         for (unsigned b = 0; b < bytesPerLane; b++) {
    3089           0 :             bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
    3090             :         }
    3091             :     }
    3092             : 
    3093           0 :     if (AssemblerX86Shared::HasSSSE3()) {
    3094           0 :         ScratchSimd128Scope scratch(masm);
    3095           0 :         masm.loadConstantSimd128Int(SimdConstant::CreateX16(bLane), scratch);
    3096           0 :         FloatRegister inputCopy = masm.reusedInputInt32x4(input, output);
    3097           0 :         masm.vpshufb(scratch, inputCopy, output);
    3098           0 :         return;
    3099             :     }
    3100             : 
    3101             :     // Worst-case fallback for pre-SSSE3 machines. Bounce through memory.
    3102           0 :     Register temp = ToRegister(ins->getTemp(0));
    3103           0 :     masm.reserveStack(2 * Simd128DataSize);
    3104           0 :     masm.storeAlignedSimd128Int(input, Address(StackPointer, Simd128DataSize));
    3105           0 :     for (unsigned i = 0; i < 16; i++) {
    3106           0 :         masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
    3107           0 :         masm.store8(temp, Address(StackPointer, i));
    3108             :     }
    3109           0 :     masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
    3110           0 :     masm.freeStack(2 * Simd128DataSize);
    3111             : }
    3112             : 
    3113             : void
    3114           0 : CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF* ins)
    3115             : {
    3116           0 :     FloatRegister input = ToFloatRegister(ins->input());
    3117           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3118           0 :     MOZ_ASSERT(ins->numLanes() == 4);
    3119             : 
    3120           0 :     uint32_t x = ins->lane(0);
    3121           0 :     uint32_t y = ins->lane(1);
    3122           0 :     uint32_t z = ins->lane(2);
    3123           0 :     uint32_t w = ins->lane(3);
    3124             : 
    3125           0 :     if (AssemblerX86Shared::HasSSE3()) {
    3126           0 :         if (ins->lanesMatch(0, 0, 2, 2)) {
    3127           0 :             masm.vmovsldup(input, output);
    3128           0 :             return;
    3129             :         }
    3130           0 :         if (ins->lanesMatch(1, 1, 3, 3)) {
    3131           0 :             masm.vmovshdup(input, output);
    3132           0 :             return;
    3133             :         }
    3134             :     }
    3135             : 
    3136             :     // TODO Here and below, arch specific lowering could identify this pattern
    3137             :     // and use defineReuseInput to avoid this move (bug 1084404)
    3138           0 :     if (ins->lanesMatch(2, 3, 2, 3)) {
    3139           0 :         FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
    3140           0 :         masm.vmovhlps(input, inputCopy, output);
    3141           0 :         return;
    3142             :     }
    3143             : 
    3144           0 :     if (ins->lanesMatch(0, 1, 0, 1)) {
    3145           0 :         if (AssemblerX86Shared::HasSSE3() && !AssemblerX86Shared::HasAVX()) {
    3146           0 :             masm.vmovddup(input, output);
    3147           0 :             return;
    3148             :         }
    3149           0 :         FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
    3150           0 :         masm.vmovlhps(input, inputCopy, output);
    3151           0 :         return;
    3152             :     }
    3153             : 
    3154           0 :     if (ins->lanesMatch(0, 0, 1, 1)) {
    3155           0 :         FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
    3156           0 :         masm.vunpcklps(input, inputCopy, output);
    3157           0 :         return;
    3158             :     }
    3159             : 
    3160           0 :     if (ins->lanesMatch(2, 2, 3, 3)) {
    3161           0 :         FloatRegister inputCopy = masm.reusedInputFloat32x4(input, output);
    3162           0 :         masm.vunpckhps(input, inputCopy, output);
    3163           0 :         return;
    3164             :     }
    3165             : 
    3166           0 :     uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
    3167           0 :     masm.shuffleFloat32(mask, input, output);
    3168             : }
    3169             : 
    3170             : void
    3171           0 : CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle* ins)
    3172             : {
    3173           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3174           0 :     FloatRegister rhs = ToFloatRegister(ins->rhs());
    3175           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3176           0 :     const unsigned numLanes = ins->numLanes();
    3177           0 :     const unsigned bytesPerLane = 16 / numLanes;
    3178             : 
    3179             :     // Convert the shuffle to a byte-wise shuffle.
    3180             :     uint8_t bLane[16];
    3181           0 :     for (unsigned i = 0; i < numLanes; i++) {
    3182           0 :         for (unsigned b = 0; b < bytesPerLane; b++) {
    3183           0 :             bLane[i * bytesPerLane + b] = ins->lane(i) * bytesPerLane + b;
    3184             :         }
    3185             :     }
    3186             : 
    3187             :     // Use pshufb if it is available.
    3188           0 :     if (AssemblerX86Shared::HasSSSE3()) {
    3189           0 :         FloatRegister scratch1 = ToFloatRegister(ins->temp());
    3190           0 :         ScratchSimd128Scope scratch2(masm);
    3191             : 
    3192             :         // Use pshufb instructions to gather the lanes from each source vector.
    3193             :         // A negative index creates a zero lane, so the two vectors can be combined.
    3194             : 
    3195             :         // Set scratch2 = lanes from lhs.
    3196             :         int8_t idx[16];
    3197           0 :         for (unsigned i = 0; i < 16; i++)
    3198           0 :             idx[i] = bLane[i] < 16 ? bLane[i] : -1;
    3199           0 :         masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
    3200           0 :         FloatRegister lhsCopy = masm.reusedInputInt32x4(lhs, scratch2);
    3201           0 :         masm.vpshufb(scratch1, lhsCopy, scratch2);
    3202             : 
    3203             :         // Set output = lanes from rhs.
    3204           0 :         for (unsigned i = 0; i < 16; i++)
    3205           0 :             idx[i] = bLane[i] >= 16 ? bLane[i] - 16 : -1;
    3206           0 :         masm.loadConstantSimd128Int(SimdConstant::CreateX16(idx), scratch1);
    3207           0 :         FloatRegister rhsCopy = masm.reusedInputInt32x4(rhs, output);
    3208           0 :         masm.vpshufb(scratch1, rhsCopy, output);
    3209             : 
    3210             :         // Combine.
    3211           0 :         masm.vpor(scratch2, output, output);
    3212           0 :         return;
    3213             :     }
    3214             : 
    3215             :     // Worst-case fallback for pre-SSE3 machines. Bounce through memory.
    3216           0 :     Register temp = ToRegister(ins->getTemp(0));
    3217           0 :     masm.reserveStack(3 * Simd128DataSize);
    3218           0 :     masm.storeAlignedSimd128Int(lhs, Address(StackPointer, Simd128DataSize));
    3219           0 :     masm.storeAlignedSimd128Int(rhs, Address(StackPointer, 2 * Simd128DataSize));
    3220           0 :     for (unsigned i = 0; i < 16; i++) {
    3221           0 :         masm.load8ZeroExtend(Address(StackPointer, Simd128DataSize + bLane[i]), temp);
    3222           0 :         masm.store8(temp, Address(StackPointer, i));
    3223             :     }
    3224           0 :     masm.loadAlignedSimd128Int(Address(StackPointer, 0), output);
    3225           0 :     masm.freeStack(3 * Simd128DataSize);
    3226             : }
    3227             : 
    3228             : void
    3229           0 : CodeGeneratorX86Shared::visitSimdShuffleX4(LSimdShuffleX4* ins)
    3230             : {
    3231           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3232           0 :     Operand rhs = ToOperand(ins->rhs());
    3233           0 :     FloatRegister out = ToFloatRegister(ins->output());
    3234             : 
    3235           0 :     uint32_t x = ins->lane(0);
    3236           0 :     uint32_t y = ins->lane(1);
    3237           0 :     uint32_t z = ins->lane(2);
    3238           0 :     uint32_t w = ins->lane(3);
    3239             : 
    3240             :     // Check that lanes come from LHS in majority:
    3241           0 :     unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
    3242           0 :     MOZ_ASSERT(numLanesFromLHS >= 2);
    3243             : 
    3244             :     // When reading this method, remember that vshufps takes the two first
    3245             :     // inputs of the destination operand (right operand) and the two last
    3246             :     // inputs of the source operand (left operand).
    3247             :     //
    3248             :     // Legend for explanations:
    3249             :     // - L: LHS
    3250             :     // - R: RHS
    3251             :     // - T: temporary
    3252             : 
    3253             :     uint32_t mask;
    3254             : 
    3255             :     // If all lanes came from a single vector, we should have constructed a
    3256             :     // MSimdSwizzle instead.
    3257           0 :     MOZ_ASSERT(numLanesFromLHS < 4);
    3258             : 
    3259             :     // If all values stay in their lane, this is a blend.
    3260           0 :     if (AssemblerX86Shared::HasSSE41()) {
    3261           0 :         if (x % 4 == 0 && y % 4 == 1 && z % 4 == 2 && w % 4 == 3) {
    3262           0 :             masm.vblendps(masm.blendpsMask(x >= 4, y >= 4, z >= 4, w >= 4), rhs, lhs, out);
    3263           0 :             return;
    3264             :         }
    3265             :     }
    3266             : 
    3267             :     // One element of the second, all other elements of the first
    3268           0 :     if (numLanesFromLHS == 3) {
    3269           0 :         unsigned firstMask = -1, secondMask = -1;
    3270             : 
    3271             :         // register-register vmovss preserves the high lanes.
    3272           0 :         if (ins->lanesMatch(4, 1, 2, 3) && rhs.kind() == Operand::FPREG) {
    3273           0 :             masm.vmovss(FloatRegister::FromCode(rhs.fpu()), lhs, out);
    3274           0 :             return;
    3275             :         }
    3276             : 
    3277             :         // SSE4.1 vinsertps can handle any single element.
    3278           0 :         unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3);
    3279           0 :         if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) {
    3280             :             unsigned srcLane;
    3281             :             unsigned dstLane;
    3282           0 :             if (x >= 4) {
    3283           0 :                 srcLane = x - 4;
    3284           0 :                 dstLane = 0;
    3285           0 :             } else if (y >= 4) {
    3286           0 :                 srcLane = y - 4;
    3287           0 :                 dstLane = 1;
    3288           0 :             } else if (z >= 4) {
    3289           0 :                 srcLane = z - 4;
    3290           0 :                 dstLane = 2;
    3291             :             } else {
    3292           0 :                 MOZ_ASSERT(w >= 4);
    3293           0 :                 srcLane = w - 4;
    3294           0 :                 dstLane = 3;
    3295             :             }
    3296           0 :             masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, lhs, out);
    3297           0 :             return;
    3298             :         }
    3299             : 
    3300           0 :         FloatRegister rhsCopy = ToFloatRegister(ins->temp());
    3301             : 
    3302           0 :         if (x < 4 && y < 4) {
    3303           0 :             if (w >= 4) {
    3304           0 :                 w %= 4;
    3305             :                 // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhsCopy)
    3306           0 :                 firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
    3307             :                 // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, out)
    3308           0 :                 secondMask = MacroAssembler::ComputeShuffleMask(x, y, 2, 0);
    3309             :             } else {
    3310           0 :                 MOZ_ASSERT(z >= 4);
    3311           0 :                 z %= 4;
    3312             :                 // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhsCopy)
    3313           0 :                 firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
    3314             :                 // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, out)
    3315           0 :                 secondMask = MacroAssembler::ComputeShuffleMask(x, y, 0, 2);
    3316             :             }
    3317             : 
    3318           0 :             masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
    3319           0 :             masm.vshufps(secondMask, rhsCopy, lhs, out);
    3320           0 :             return;
    3321             :         }
    3322             : 
    3323           0 :         MOZ_ASSERT(z < 4 && w < 4);
    3324             : 
    3325           0 :         if (y >= 4) {
    3326           0 :             y %= 4;
    3327             :             // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhsCopy)
    3328           0 :             firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
    3329             :             // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, out)
    3330           0 :             secondMask = MacroAssembler::ComputeShuffleMask(2, 0, z, w);
    3331             :         } else {
    3332           0 :             MOZ_ASSERT(x >= 4);
    3333           0 :             x %= 4;
    3334             :             // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhsCopy)
    3335           0 :             firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
    3336             :             // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, out)
    3337           0 :             secondMask = MacroAssembler::ComputeShuffleMask(0, 2, z, w);
    3338             :         }
    3339             : 
    3340           0 :         masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
    3341           0 :         if (AssemblerX86Shared::HasAVX()) {
    3342           0 :             masm.vshufps(secondMask, lhs, rhsCopy, out);
    3343             :         } else {
    3344           0 :             masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy);
    3345           0 :             masm.moveSimd128Float(rhsCopy, out);
    3346             :         }
    3347           0 :         return;
    3348             :     }
    3349             : 
    3350             :     // Two elements from one vector, two other elements from the other
    3351           0 :     MOZ_ASSERT(numLanesFromLHS == 2);
    3352             : 
    3353             :     // TODO Here and below, symmetric case would be more handy to avoid a move,
    3354             :     // but can't be reached because operands would get swapped (bug 1084404).
    3355           0 :     if (ins->lanesMatch(2, 3, 6, 7)) {
    3356           0 :         ScratchSimd128Scope scratch(masm);
    3357           0 :         if (AssemblerX86Shared::HasAVX()) {
    3358           0 :             FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
    3359           0 :             masm.vmovhlps(lhs, rhsCopy, out);
    3360             :         } else {
    3361           0 :             masm.loadAlignedSimd128Float(rhs, scratch);
    3362           0 :             masm.vmovhlps(lhs, scratch, scratch);
    3363           0 :             masm.moveSimd128Float(scratch, out);
    3364             :         }
    3365           0 :         return;
    3366             :     }
    3367             : 
    3368           0 :     if (ins->lanesMatch(0, 1, 4, 5)) {
    3369           0 :         FloatRegister rhsCopy;
    3370           0 :         ScratchSimd128Scope scratch(masm);
    3371           0 :         if (rhs.kind() == Operand::FPREG) {
    3372             :             // No need to make an actual copy, since the operand is already
    3373             :             // in a register, and it won't be clobbered by the vmovlhps.
    3374           0 :             rhsCopy = FloatRegister::FromCode(rhs.fpu());
    3375             :         } else {
    3376           0 :             masm.loadAlignedSimd128Float(rhs, scratch);
    3377           0 :             rhsCopy = scratch;
    3378             :         }
    3379           0 :         masm.vmovlhps(rhsCopy, lhs, out);
    3380           0 :         return;
    3381             :     }
    3382             : 
    3383           0 :     if (ins->lanesMatch(0, 4, 1, 5)) {
    3384           0 :         masm.vunpcklps(rhs, lhs, out);
    3385           0 :         return;
    3386             :     }
    3387             : 
    3388             :     // TODO swapped case would be better (bug 1084404)
    3389           0 :     if (ins->lanesMatch(4, 0, 5, 1)) {
    3390           0 :         ScratchSimd128Scope scratch(masm);
    3391           0 :         if (AssemblerX86Shared::HasAVX()) {
    3392           0 :             FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
    3393           0 :             masm.vunpcklps(lhs, rhsCopy, out);
    3394             :         } else {
    3395           0 :             masm.loadAlignedSimd128Float(rhs, scratch);
    3396           0 :             masm.vunpcklps(lhs, scratch, scratch);
    3397           0 :             masm.moveSimd128Float(scratch, out);
    3398             :         }
    3399           0 :         return;
    3400             :     }
    3401             : 
    3402           0 :     if (ins->lanesMatch(2, 6, 3, 7)) {
    3403           0 :         masm.vunpckhps(rhs, lhs, out);
    3404           0 :         return;
    3405             :     }
    3406             : 
    3407             :     // TODO swapped case would be better (bug 1084404)
    3408           0 :     if (ins->lanesMatch(6, 2, 7, 3)) {
    3409           0 :         ScratchSimd128Scope scratch(masm);
    3410           0 :         if (AssemblerX86Shared::HasAVX()) {
    3411           0 :             FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
    3412           0 :             masm.vunpckhps(lhs, rhsCopy, out);
    3413             :         } else {
    3414           0 :             masm.loadAlignedSimd128Float(rhs, scratch);
    3415           0 :             masm.vunpckhps(lhs, scratch, scratch);
    3416           0 :             masm.moveSimd128Float(scratch, out);
    3417             :         }
    3418           0 :         return;
    3419             :     }
    3420             : 
    3421             :     // In one vshufps
    3422           0 :     if (x < 4 && y < 4) {
    3423           0 :         mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
    3424           0 :         masm.vshufps(mask, rhs, lhs, out);
    3425           0 :         return;
    3426             :     }
    3427             : 
    3428             :     // At creation, we should have explicitly swapped in this case.
    3429           0 :     MOZ_ASSERT(!(z >= 4 && w >= 4));
    3430             : 
    3431             :     // In two vshufps, for the most generic case:
    3432             :     uint32_t firstMask[4], secondMask[4];
    3433           0 :     unsigned i = 0, j = 2, k = 0;
    3434             : 
    3435             : #define COMPUTE_MASK(lane)       \
    3436             :     if (lane >= 4) {             \
    3437             :         firstMask[j] = lane % 4; \
    3438             :         secondMask[k++] = j++;   \
    3439             :     } else {                     \
    3440             :         firstMask[i] = lane;     \
    3441             :         secondMask[k++] = i++;   \
    3442             :     }
    3443             : 
    3444           0 :     COMPUTE_MASK(x)
    3445           0 :     COMPUTE_MASK(y)
    3446           0 :     COMPUTE_MASK(z)
    3447           0 :     COMPUTE_MASK(w)
    3448             : #undef COMPUTE_MASK
    3449             : 
    3450           0 :     MOZ_ASSERT(i == 2 && j == 4 && k == 4);
    3451             : 
    3452           0 :     mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
    3453           0 :                                               firstMask[2], firstMask[3]);
    3454           0 :     masm.vshufps(mask, rhs, lhs, lhs);
    3455             : 
    3456           0 :     mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
    3457           0 :                                               secondMask[2], secondMask[3]);
    3458           0 :     masm.vshufps(mask, lhs, lhs, lhs);
    3459             : }
    3460             : 
    3461             : void
    3462           0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx16(LSimdBinaryCompIx16* ins)
    3463             : {
    3464           0 :     static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
    3465             : 
    3466           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3467           0 :     Operand rhs = ToOperand(ins->rhs());
    3468           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3469           0 :     MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
    3470             : 
    3471           0 :     ScratchSimd128Scope scratch(masm);
    3472             : 
    3473           0 :     MSimdBinaryComp::Operation op = ins->operation();
    3474           0 :     switch (op) {
    3475             :       case MSimdBinaryComp::greaterThan:
    3476           0 :         masm.vpcmpgtb(rhs, lhs, output);
    3477           0 :         return;
    3478             :       case MSimdBinaryComp::equal:
    3479           0 :         masm.vpcmpeqb(rhs, lhs, output);
    3480           0 :         return;
    3481             :       case MSimdBinaryComp::lessThan:
    3482             :         // src := rhs
    3483           0 :         if (rhs.kind() == Operand::FPREG)
    3484           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3485             :         else
    3486           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3487             : 
    3488             :         // src := src > lhs (i.e. lhs < rhs)
    3489             :         // Improve by doing custom lowering (rhs is tied to the output register)
    3490           0 :         masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
    3491           0 :         masm.moveSimd128Int(scratch, output);
    3492           0 :         return;
    3493             :       case MSimdBinaryComp::notEqual:
    3494             :         // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
    3495             :         // should invert the comparison by, e.g. swapping the arms of a select
    3496             :         // if that's what it's used in.
    3497           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3498           0 :         masm.vpcmpeqb(rhs, lhs, output);
    3499           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3500           0 :         return;
    3501             :       case MSimdBinaryComp::greaterThanOrEqual:
    3502             :         // src := rhs
    3503           0 :         if (rhs.kind() == Operand::FPREG)
    3504           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3505             :         else
    3506           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3507           0 :         masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
    3508           0 :         masm.loadConstantSimd128Int(allOnes, output);
    3509           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3510           0 :         return;
    3511             :       case MSimdBinaryComp::lessThanOrEqual:
    3512             :         // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
    3513           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3514           0 :         masm.vpcmpgtb(rhs, lhs, output);
    3515           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3516           0 :         return;
    3517             :     }
    3518           0 :     MOZ_CRASH("unexpected SIMD op");
    3519             : }
    3520             : 
    3521             : void
    3522           0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx8(LSimdBinaryCompIx8* ins)
    3523             : {
    3524           0 :     static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
    3525             : 
    3526           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3527           0 :     Operand rhs = ToOperand(ins->rhs());
    3528           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3529           0 :     MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
    3530             : 
    3531           0 :     ScratchSimd128Scope scratch(masm);
    3532             : 
    3533           0 :     MSimdBinaryComp::Operation op = ins->operation();
    3534           0 :     switch (op) {
    3535             :       case MSimdBinaryComp::greaterThan:
    3536           0 :         masm.vpcmpgtw(rhs, lhs, output);
    3537           0 :         return;
    3538             :       case MSimdBinaryComp::equal:
    3539           0 :         masm.vpcmpeqw(rhs, lhs, output);
    3540           0 :         return;
    3541             :       case MSimdBinaryComp::lessThan:
    3542             :         // src := rhs
    3543           0 :         if (rhs.kind() == Operand::FPREG)
    3544           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3545             :         else
    3546           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3547             : 
    3548             :         // src := src > lhs (i.e. lhs < rhs)
    3549             :         // Improve by doing custom lowering (rhs is tied to the output register)
    3550           0 :         masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
    3551           0 :         masm.moveSimd128Int(scratch, output);
    3552           0 :         return;
    3553             :       case MSimdBinaryComp::notEqual:
    3554             :         // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
    3555             :         // should invert the comparison by, e.g. swapping the arms of a select
    3556             :         // if that's what it's used in.
    3557           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3558           0 :         masm.vpcmpeqw(rhs, lhs, output);
    3559           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3560           0 :         return;
    3561             :       case MSimdBinaryComp::greaterThanOrEqual:
    3562             :         // src := rhs
    3563           0 :         if (rhs.kind() == Operand::FPREG)
    3564           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3565             :         else
    3566           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3567           0 :         masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
    3568           0 :         masm.loadConstantSimd128Int(allOnes, output);
    3569           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3570           0 :         return;
    3571             :       case MSimdBinaryComp::lessThanOrEqual:
    3572             :         // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
    3573           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3574           0 :         masm.vpcmpgtw(rhs, lhs, output);
    3575           0 :         masm.bitwiseXorSimd128(Operand(scratch), output);
    3576           0 :         return;
    3577             :     }
    3578           0 :     MOZ_CRASH("unexpected SIMD op");
    3579             : }
    3580             : 
    3581             : void
    3582           0 : CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
    3583             : {
    3584           0 :     static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
    3585             : 
    3586           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3587           0 :     Operand rhs = ToOperand(ins->rhs());
    3588           0 :     MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
    3589             : 
    3590           0 :     ScratchSimd128Scope scratch(masm);
    3591             : 
    3592           0 :     MSimdBinaryComp::Operation op = ins->operation();
    3593           0 :     switch (op) {
    3594             :       case MSimdBinaryComp::greaterThan:
    3595           0 :         masm.packedGreaterThanInt32x4(rhs, lhs);
    3596           0 :         return;
    3597             :       case MSimdBinaryComp::equal:
    3598           0 :         masm.packedEqualInt32x4(rhs, lhs);
    3599           0 :         return;
    3600             :       case MSimdBinaryComp::lessThan:
    3601             :         // src := rhs
    3602           0 :         if (rhs.kind() == Operand::FPREG)
    3603           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3604             :         else
    3605           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3606             : 
    3607             :         // src := src > lhs (i.e. lhs < rhs)
    3608             :         // Improve by doing custom lowering (rhs is tied to the output register)
    3609           0 :         masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
    3610           0 :         masm.moveSimd128Int(scratch, lhs);
    3611           0 :         return;
    3612             :       case MSimdBinaryComp::notEqual:
    3613             :         // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
    3614             :         // should invert the comparison by, e.g. swapping the arms of a select
    3615             :         // if that's what it's used in.
    3616           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3617           0 :         masm.packedEqualInt32x4(rhs, lhs);
    3618           0 :         masm.bitwiseXorSimd128(Operand(scratch), lhs);
    3619           0 :         return;
    3620             :       case MSimdBinaryComp::greaterThanOrEqual:
    3621             :         // src := rhs
    3622           0 :         if (rhs.kind() == Operand::FPREG)
    3623           0 :             masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
    3624             :         else
    3625           0 :             masm.loadAlignedSimd128Int(rhs, scratch);
    3626           0 :         masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), scratch);
    3627           0 :         masm.loadConstantSimd128Int(allOnes, lhs);
    3628           0 :         masm.bitwiseXorSimd128(Operand(scratch), lhs);
    3629           0 :         return;
    3630             :       case MSimdBinaryComp::lessThanOrEqual:
    3631             :         // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
    3632           0 :         masm.loadConstantSimd128Int(allOnes, scratch);
    3633           0 :         masm.packedGreaterThanInt32x4(rhs, lhs);
    3634           0 :         masm.bitwiseXorSimd128(Operand(scratch), lhs);
    3635           0 :         return;
    3636             :     }
    3637           0 :     MOZ_CRASH("unexpected SIMD op");
    3638             : }
    3639             : 
    3640             : void
    3641           0 : CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4* ins)
    3642             : {
    3643           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3644           0 :     Operand rhs = ToOperand(ins->rhs());
    3645           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3646             : 
    3647           0 :     MSimdBinaryComp::Operation op = ins->operation();
    3648           0 :     switch (op) {
    3649             :       case MSimdBinaryComp::equal:
    3650           0 :         masm.vcmpeqps(rhs, lhs, output);
    3651           0 :         return;
    3652             :       case MSimdBinaryComp::lessThan:
    3653           0 :         masm.vcmpltps(rhs, lhs, output);
    3654           0 :         return;
    3655             :       case MSimdBinaryComp::lessThanOrEqual:
    3656           0 :         masm.vcmpleps(rhs, lhs, output);
    3657           0 :         return;
    3658             :       case MSimdBinaryComp::notEqual:
    3659           0 :         masm.vcmpneqps(rhs, lhs, output);
    3660           0 :         return;
    3661             :       case MSimdBinaryComp::greaterThanOrEqual:
    3662             :       case MSimdBinaryComp::greaterThan:
    3663             :         // We reverse these before register allocation so that we don't have to
    3664             :         // copy into and out of temporaries after codegen.
    3665           0 :         MOZ_CRASH("lowering should have reversed this");
    3666             :     }
    3667           0 :     MOZ_CRASH("unexpected SIMD op");
    3668             : }
    3669             : 
    3670             : void
    3671           0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx16(LSimdBinaryArithIx16* ins)
    3672             : {
    3673           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3674           0 :     Operand rhs = ToOperand(ins->rhs());
    3675           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3676             : 
    3677           0 :     MSimdBinaryArith::Operation op = ins->operation();
    3678           0 :     switch (op) {
    3679             :       case MSimdBinaryArith::Op_add:
    3680           0 :         masm.vpaddb(rhs, lhs, output);
    3681           0 :         return;
    3682             :       case MSimdBinaryArith::Op_sub:
    3683           0 :         masm.vpsubb(rhs, lhs, output);
    3684           0 :         return;
    3685             :       case MSimdBinaryArith::Op_mul:
    3686             :         // 8x16 mul is a valid operation, but not supported in SSE or AVX.
    3687             :         // The operation is synthesized from 16x8 multiplies by
    3688             :         // MSimdBinaryArith::AddLegalized().
    3689           0 :         break;
    3690             :       case MSimdBinaryArith::Op_div:
    3691             :       case MSimdBinaryArith::Op_max:
    3692             :       case MSimdBinaryArith::Op_min:
    3693             :       case MSimdBinaryArith::Op_minNum:
    3694             :       case MSimdBinaryArith::Op_maxNum:
    3695           0 :         break;
    3696             :     }
    3697           0 :     MOZ_CRASH("unexpected SIMD op");
    3698             : }
    3699             : 
    3700             : void
    3701           0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx8(LSimdBinaryArithIx8* ins)
    3702             : {
    3703           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3704           0 :     Operand rhs = ToOperand(ins->rhs());
    3705           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3706             : 
    3707           0 :     MSimdBinaryArith::Operation op = ins->operation();
    3708           0 :     switch (op) {
    3709             :       case MSimdBinaryArith::Op_add:
    3710           0 :         masm.vpaddw(rhs, lhs, output);
    3711           0 :         return;
    3712             :       case MSimdBinaryArith::Op_sub:
    3713           0 :         masm.vpsubw(rhs, lhs, output);
    3714           0 :         return;
    3715             :       case MSimdBinaryArith::Op_mul:
    3716           0 :         masm.vpmullw(rhs, lhs, output);
    3717           0 :         return;
    3718             :       case MSimdBinaryArith::Op_div:
    3719             :       case MSimdBinaryArith::Op_max:
    3720             :       case MSimdBinaryArith::Op_min:
    3721             :       case MSimdBinaryArith::Op_minNum:
    3722             :       case MSimdBinaryArith::Op_maxNum:
    3723           0 :         break;
    3724             :     }
    3725           0 :     MOZ_CRASH("unexpected SIMD op");
    3726             : }
    3727             : 
    3728             : void
    3729           0 : CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4* ins)
    3730             : {
    3731           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3732           0 :     Operand rhs = ToOperand(ins->rhs());
    3733           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3734             : 
    3735           0 :     ScratchSimd128Scope scratch(masm);
    3736             : 
    3737           0 :     MSimdBinaryArith::Operation op = ins->operation();
    3738           0 :     switch (op) {
    3739             :       case MSimdBinaryArith::Op_add:
    3740           0 :         masm.vpaddd(rhs, lhs, output);
    3741           0 :         return;
    3742             :       case MSimdBinaryArith::Op_sub:
    3743           0 :         masm.vpsubd(rhs, lhs, output);
    3744           0 :         return;
    3745             :       case MSimdBinaryArith::Op_mul: {
    3746           0 :         if (AssemblerX86Shared::HasSSE41()) {
    3747           0 :             masm.vpmulld(rhs, lhs, output);
    3748           0 :             return;
    3749             :         }
    3750             : 
    3751           0 :         masm.loadAlignedSimd128Int(rhs, scratch);
    3752           0 :         masm.vpmuludq(lhs, scratch, scratch);
    3753             :         // scratch contains (Rx, _, Rz, _) where R is the resulting vector.
    3754             : 
    3755           0 :         FloatRegister temp = ToFloatRegister(ins->temp());
    3756           0 :         masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), lhs, lhs);
    3757           0 :         masm.vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), rhs, temp);
    3758           0 :         masm.vpmuludq(temp, lhs, lhs);
    3759             :         // lhs contains (Ry, _, Rw, _) where R is the resulting vector.
    3760             : 
    3761           0 :         masm.vshufps(MacroAssembler::ComputeShuffleMask(0, 2, 0, 2), scratch, lhs, lhs);
    3762             :         // lhs contains (Ry, Rw, Rx, Rz)
    3763           0 :         masm.vshufps(MacroAssembler::ComputeShuffleMask(2, 0, 3, 1), lhs, lhs, lhs);
    3764           0 :         return;
    3765             :       }
    3766             :       case MSimdBinaryArith::Op_div:
    3767             :         // x86 doesn't have SIMD i32 div.
    3768           0 :         break;
    3769             :       case MSimdBinaryArith::Op_max:
    3770             :         // we can do max with a single instruction only if we have SSE4.1
    3771             :         // using the PMAXSD instruction.
    3772           0 :         break;
    3773             :       case MSimdBinaryArith::Op_min:
    3774             :         // we can do max with a single instruction only if we have SSE4.1
    3775             :         // using the PMINSD instruction.
    3776           0 :         break;
    3777             :       case MSimdBinaryArith::Op_minNum:
    3778             :       case MSimdBinaryArith::Op_maxNum:
    3779           0 :         break;
    3780             :     }
    3781           0 :     MOZ_CRASH("unexpected SIMD op");
    3782             : }
    3783             : 
    3784             : void
    3785           0 : CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4* ins)
    3786             : {
    3787           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3788           0 :     Operand rhs = ToOperand(ins->rhs());
    3789           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3790             : 
    3791           0 :     ScratchSimd128Scope scratch(masm);
    3792             : 
    3793           0 :     MSimdBinaryArith::Operation op = ins->operation();
    3794           0 :     switch (op) {
    3795             :       case MSimdBinaryArith::Op_add:
    3796           0 :         masm.vaddps(rhs, lhs, output);
    3797           0 :         return;
    3798             :       case MSimdBinaryArith::Op_sub:
    3799           0 :         masm.vsubps(rhs, lhs, output);
    3800           0 :         return;
    3801             :       case MSimdBinaryArith::Op_mul:
    3802           0 :         masm.vmulps(rhs, lhs, output);
    3803           0 :         return;
    3804             :       case MSimdBinaryArith::Op_div:
    3805           0 :         masm.vdivps(rhs, lhs, output);
    3806           0 :         return;
    3807             :       case MSimdBinaryArith::Op_max: {
    3808           0 :         FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, scratch);
    3809           0 :         masm.vcmpunordps(rhs, lhsCopy, scratch);
    3810             : 
    3811           0 :         FloatRegister tmp = ToFloatRegister(ins->temp());
    3812           0 :         FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, tmp);
    3813           0 :         masm.vmaxps(Operand(lhs), rhsCopy, tmp);
    3814           0 :         masm.vmaxps(rhs, lhs, output);
    3815             : 
    3816           0 :         masm.vandps(tmp, output, output);
    3817           0 :         masm.vorps(scratch, output, output); // or in the all-ones NaNs
    3818           0 :         return;
    3819             :       }
    3820             :       case MSimdBinaryArith::Op_min: {
    3821           0 :         FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, scratch);
    3822           0 :         masm.vminps(Operand(lhs), rhsCopy, scratch);
    3823           0 :         masm.vminps(rhs, lhs, output);
    3824           0 :         masm.vorps(scratch, output, output); // NaN or'd with arbitrary bits is NaN
    3825           0 :         return;
    3826             :       }
    3827             :       case MSimdBinaryArith::Op_minNum: {
    3828           0 :         FloatRegister tmp = ToFloatRegister(ins->temp());
    3829           0 :         masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
    3830             : 
    3831           0 :         FloatRegister mask = scratch;
    3832           0 :         FloatRegister tmpCopy = masm.reusedInputFloat32x4(tmp, scratch);
    3833           0 :         masm.vpcmpeqd(Operand(lhs), tmpCopy, mask);
    3834           0 :         masm.vandps(tmp, mask, mask);
    3835             : 
    3836           0 :         FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
    3837           0 :         masm.vminps(rhs, lhsCopy, tmp);
    3838           0 :         masm.vorps(mask, tmp, tmp);
    3839             : 
    3840           0 :         FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
    3841           0 :         masm.vcmpneqps(rhs, rhsCopy, mask);
    3842             : 
    3843           0 :         if (AssemblerX86Shared::HasAVX()) {
    3844           0 :             masm.vblendvps(mask, lhs, tmp, output);
    3845             :         } else {
    3846             :             // Emulate vblendvps.
    3847             :             // With SSE.4.1 we could use blendvps, however it's awkward since
    3848             :             // it requires the mask to be in xmm0.
    3849           0 :             if (lhs != output)
    3850           0 :                 masm.moveSimd128Float(lhs, output);
    3851           0 :             masm.vandps(Operand(mask), output, output);
    3852           0 :             masm.vandnps(Operand(tmp), mask, mask);
    3853           0 :             masm.vorps(Operand(mask), output, output);
    3854             :         }
    3855           0 :         return;
    3856             :       }
    3857             :       case MSimdBinaryArith::Op_maxNum: {
    3858           0 :         FloatRegister mask = scratch;
    3859           0 :         masm.loadConstantSimd128Int(SimdConstant::SplatX4(0), mask);
    3860           0 :         masm.vpcmpeqd(Operand(lhs), mask, mask);
    3861             : 
    3862           0 :         FloatRegister tmp = ToFloatRegister(ins->temp());
    3863           0 :         masm.loadConstantSimd128Int(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
    3864           0 :         masm.vandps(tmp, mask, mask);
    3865             : 
    3866           0 :         FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
    3867           0 :         masm.vmaxps(rhs, lhsCopy, tmp);
    3868           0 :         masm.vandnps(Operand(tmp), mask, mask);
    3869             : 
    3870             :         // Ensure tmp always contains the temporary result
    3871           0 :         mask = tmp;
    3872           0 :         tmp = scratch;
    3873             : 
    3874           0 :         FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
    3875           0 :         masm.vcmpneqps(rhs, rhsCopy, mask);
    3876             : 
    3877           0 :         if (AssemblerX86Shared::HasAVX()) {
    3878           0 :             masm.vblendvps(mask, lhs, tmp, output);
    3879             :         } else {
    3880             :             // Emulate vblendvps.
    3881             :             // With SSE.4.1 we could use blendvps, however it's awkward since
    3882             :             // it requires the mask to be in xmm0.
    3883           0 :             if (lhs != output)
    3884           0 :                 masm.moveSimd128Float(lhs, output);
    3885           0 :             masm.vandps(Operand(mask), output, output);
    3886           0 :             masm.vandnps(Operand(tmp), mask, mask);
    3887           0 :             masm.vorps(Operand(mask), output, output);
    3888             :         }
    3889           0 :         return;
    3890             :       }
    3891             :     }
    3892           0 :     MOZ_CRASH("unexpected SIMD op");
    3893             : }
    3894             : 
    3895             : void
    3896           0 : CodeGeneratorX86Shared::visitSimdBinarySaturating(LSimdBinarySaturating* ins)
    3897             : {
    3898           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    3899           0 :     Operand rhs = ToOperand(ins->rhs());
    3900           0 :     FloatRegister output = ToFloatRegister(ins->output());
    3901             : 
    3902           0 :     SimdSign sign = ins->signedness();
    3903           0 :     MOZ_ASSERT(sign != SimdSign::NotApplicable);
    3904             : 
    3905           0 :     switch (ins->type()) {
    3906             :       case MIRType::Int8x16:
    3907           0 :         switch (ins->operation()) {
    3908             :           case MSimdBinarySaturating::add:
    3909           0 :             if (sign == SimdSign::Signed)
    3910           0 :                 masm.vpaddsb(rhs, lhs, output);
    3911             :             else
    3912           0 :                 masm.vpaddusb(rhs, lhs, output);
    3913           0 :             return;
    3914             :           case MSimdBinarySaturating::sub:
    3915           0 :             if (sign == SimdSign::Signed)
    3916           0 :                 masm.vpsubsb(rhs, lhs, output);
    3917             :             else
    3918           0 :                 masm.vpsubusb(rhs, lhs, output);
    3919           0 :             return;
    3920             :         }
    3921           0 :         break;
    3922             : 
    3923             :       case MIRType::Int16x8:
    3924           0 :         switch (ins->operation()) {
    3925             :           case MSimdBinarySaturating::add:
    3926           0 :             if (sign == SimdSign::Signed)
    3927           0 :                 masm.vpaddsw(rhs, lhs, output);
    3928             :             else
    3929           0 :                 masm.vpaddusw(rhs, lhs, output);
    3930           0 :             return;
    3931             :           case MSimdBinarySaturating::sub:
    3932           0 :             if (sign == SimdSign::Signed)
    3933           0 :                 masm.vpsubsw(rhs, lhs, output);
    3934             :             else
    3935           0 :                 masm.vpsubusw(rhs, lhs, output);
    3936           0 :             return;
    3937             :         }
    3938           0 :         break;
    3939             : 
    3940             :       default:
    3941           0 :         break;
    3942             :     }
    3943           0 :     MOZ_CRASH("unsupported type for SIMD saturating arithmetic");
    3944             : }
    3945             : 
    3946             : void
    3947           0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx16(LSimdUnaryArithIx16* ins)
    3948             : {
    3949           0 :     Operand in = ToOperand(ins->input());
    3950           0 :     FloatRegister out = ToFloatRegister(ins->output());
    3951             : 
    3952           0 :     static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
    3953             : 
    3954           0 :     switch (ins->operation()) {
    3955             :       case MSimdUnaryArith::neg:
    3956           0 :         masm.zeroSimd128Int(out);
    3957           0 :         masm.packedSubInt8(in, out);
    3958           0 :         return;
    3959             :       case MSimdUnaryArith::not_:
    3960           0 :         masm.loadConstantSimd128Int(allOnes, out);
    3961           0 :         masm.bitwiseXorSimd128(in, out);
    3962           0 :         return;
    3963             :       case MSimdUnaryArith::abs:
    3964             :       case MSimdUnaryArith::reciprocalApproximation:
    3965             :       case MSimdUnaryArith::reciprocalSqrtApproximation:
    3966             :       case MSimdUnaryArith::sqrt:
    3967           0 :         break;
    3968             :     }
    3969           0 :     MOZ_CRASH("unexpected SIMD op");
    3970             : }
    3971             : 
    3972             : void
    3973           0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx8(LSimdUnaryArithIx8* ins)
    3974             : {
    3975           0 :     Operand in = ToOperand(ins->input());
    3976           0 :     FloatRegister out = ToFloatRegister(ins->output());
    3977             : 
    3978           0 :     static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
    3979             : 
    3980           0 :     switch (ins->operation()) {
    3981             :       case MSimdUnaryArith::neg:
    3982           0 :         masm.zeroSimd128Int(out);
    3983           0 :         masm.packedSubInt16(in, out);
    3984           0 :         return;
    3985             :       case MSimdUnaryArith::not_:
    3986           0 :         masm.loadConstantSimd128Int(allOnes, out);
    3987           0 :         masm.bitwiseXorSimd128(in, out);
    3988           0 :         return;
    3989             :       case MSimdUnaryArith::abs:
    3990             :       case MSimdUnaryArith::reciprocalApproximation:
    3991             :       case MSimdUnaryArith::reciprocalSqrtApproximation:
    3992             :       case MSimdUnaryArith::sqrt:
    3993           0 :         break;
    3994             :     }
    3995           0 :     MOZ_CRASH("unexpected SIMD op");
    3996             : }
    3997             : 
    3998             : void
    3999           0 : CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4* ins)
    4000             : {
    4001           0 :     Operand in = ToOperand(ins->input());
    4002           0 :     FloatRegister out = ToFloatRegister(ins->output());
    4003             : 
    4004           0 :     static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
    4005             : 
    4006           0 :     switch (ins->operation()) {
    4007             :       case MSimdUnaryArith::neg:
    4008           0 :         masm.zeroSimd128Int(out);
    4009           0 :         masm.packedSubInt32(in, out);
    4010           0 :         return;
    4011             :       case MSimdUnaryArith::not_:
    4012           0 :         masm.loadConstantSimd128Int(allOnes, out);
    4013           0 :         masm.bitwiseXorSimd128(in, out);
    4014           0 :         return;
    4015             :       case MSimdUnaryArith::abs:
    4016             :       case MSimdUnaryArith::reciprocalApproximation:
    4017             :       case MSimdUnaryArith::reciprocalSqrtApproximation:
    4018             :       case MSimdUnaryArith::sqrt:
    4019           0 :         break;
    4020             :     }
    4021           0 :     MOZ_CRASH("unexpected SIMD op");
    4022             : }
    4023             : 
    4024             : void
    4025           0 : CodeGeneratorX86Shared::visitSimdUnaryArithFx4(LSimdUnaryArithFx4* ins)
    4026             : {
    4027           0 :     Operand in = ToOperand(ins->input());
    4028           0 :     FloatRegister out = ToFloatRegister(ins->output());
    4029             : 
    4030             :     // All ones but the sign bit
    4031           0 :     float signMask = SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits);
    4032           0 :     static const SimdConstant signMasks = SimdConstant::SplatX4(signMask);
    4033             : 
    4034             :     // All ones including the sign bit
    4035           0 :     float ones = SpecificNaN<float>(1, FloatingPoint<float>::kSignificandBits);
    4036           0 :     static const SimdConstant allOnes = SimdConstant::SplatX4(ones);
    4037             : 
    4038             :     // All zeros but the sign bit
    4039           0 :     static const SimdConstant minusZero = SimdConstant::SplatX4(-0.f);
    4040             : 
    4041           0 :     switch (ins->operation()) {
    4042             :       case MSimdUnaryArith::abs:
    4043           0 :         masm.loadConstantSimd128Float(signMasks, out);
    4044           0 :         masm.bitwiseAndSimd128(in, out);
    4045           0 :         return;
    4046             :       case MSimdUnaryArith::neg:
    4047           0 :         masm.loadConstantSimd128Float(minusZero, out);
    4048           0 :         masm.bitwiseXorSimd128(in, out);
    4049           0 :         return;
    4050             :       case MSimdUnaryArith::not_:
    4051           0 :         masm.loadConstantSimd128Float(allOnes, out);
    4052           0 :         masm.bitwiseXorSimd128(in, out);
    4053           0 :         return;
    4054             :       case MSimdUnaryArith::reciprocalApproximation:
    4055           0 :         masm.packedRcpApproximationFloat32x4(in, out);
    4056           0 :         return;
    4057             :       case MSimdUnaryArith::reciprocalSqrtApproximation:
    4058           0 :         masm.packedRcpSqrtApproximationFloat32x4(in, out);
    4059           0 :         return;
    4060             :       case MSimdUnaryArith::sqrt:
    4061           0 :         masm.packedSqrtFloat32x4(in, out);
    4062           0 :         return;
    4063             :     }
    4064           0 :     MOZ_CRASH("unexpected SIMD op");
    4065             : }
    4066             : 
    4067             : void
    4068           0 : CodeGeneratorX86Shared::visitSimdBinaryBitwise(LSimdBinaryBitwise* ins)
    4069             : {
    4070           0 :     FloatRegister lhs = ToFloatRegister(ins->lhs());
    4071           0 :     Operand rhs = ToOperand(ins->rhs());
    4072           0 :     FloatRegister output = ToFloatRegister(ins->output());
    4073             : 
    4074           0 :     MSimdBinaryBitwise::Operation op = ins->operation();
    4075           0 :     switch (op) {
    4076             :       case MSimdBinaryBitwise::and_:
    4077           0 :         if (ins->type() == MIRType::Float32x4)
    4078           0 :             masm.vandps(rhs, lhs, output);
    4079             :         else
    4080           0 :             masm.vpand(rhs, lhs, output);
    4081           0 :         return;
    4082             :       case MSimdBinaryBitwise::or_:
    4083           0 :         if (ins->type() == MIRType::Float32x4)
    4084           0 :             masm.vorps(rhs, lhs, output);
    4085             :         else
    4086           0 :             masm.vpor(rhs, lhs, output);
    4087           0 :         return;
    4088             :       case MSimdBinaryBitwise::xor_:
    4089           0 :         if (ins->type() == MIRType::Float32x4)
    4090           0 :             masm.vxorps(rhs, lhs, output);
    4091             :         else
    4092           0 :             masm.vpxor(rhs, lhs, output);
    4093           0 :         return;
    4094             :     }
    4095           0 :     MOZ_CRASH("unexpected SIMD bitwise op");
    4096             : }
    4097             : 
    4098             : void
    4099           0 : CodeGeneratorX86Shared::visitSimdShift(LSimdShift* ins)
    4100             : {
    4101           0 :     FloatRegister out = ToFloatRegister(ins->output());
    4102           0 :     MOZ_ASSERT(ToFloatRegister(ins->vector()) == out); // defineReuseInput(0);
    4103             : 
    4104             :     // The shift amount is masked to the number of bits in a lane.
    4105           0 :     uint32_t shiftmask = (128u / SimdTypeToLength(ins->type())) - 1;
    4106             : 
    4107             :     // Note that SSE doesn't have instructions for shifting 8x16 vectors.
    4108             :     // These shifts are synthesized by the MSimdShift::AddLegalized() function.
    4109           0 :     const LAllocation* val = ins->value();
    4110           0 :     if (val->isConstant()) {
    4111           0 :         MOZ_ASSERT(ins->temp()->isBogusTemp());
    4112           0 :         Imm32 count(uint32_t(ToInt32(val)) & shiftmask);
    4113           0 :         switch (ins->type()) {
    4114             :           case MIRType::Int16x8:
    4115           0 :             switch (ins->operation()) {
    4116             :               case MSimdShift::lsh:
    4117           0 :                 masm.packedLeftShiftByScalarInt16x8(count, out);
    4118           0 :                 return;
    4119             :               case MSimdShift::rsh:
    4120           0 :                 masm.packedRightShiftByScalarInt16x8(count, out);
    4121           0 :                 return;
    4122             :               case MSimdShift::ursh:
    4123           0 :                 masm.packedUnsignedRightShiftByScalarInt16x8(count, out);
    4124           0 :                 return;
    4125             :             }
    4126           0 :             break;
    4127             :           case MIRType::Int32x4:
    4128           0 :             switch (ins->operation()) {
    4129             :               case MSimdShift::lsh:
    4130           0 :                 masm.packedLeftShiftByScalarInt32x4(count, out);
    4131           0 :                 return;
    4132             :               case MSimdShift::rsh:
    4133           0 :                 masm.packedRightShiftByScalarInt32x4(count, out);
    4134           0 :                 return;
    4135             :               case MSimdShift::ursh:
    4136           0 :                 masm.packedUnsignedRightShiftByScalarInt32x4(count, out);
    4137           0 :                 return;
    4138             :             }
    4139           0 :             break;
    4140             :           default:
    4141           0 :             MOZ_CRASH("unsupported type for SIMD shifts");
    4142             :         }
    4143           0 :         MOZ_CRASH("unexpected SIMD bitwise op");
    4144             :     }
    4145             : 
    4146             :     // Truncate val to 5 bits. We should have a temp register for that.
    4147           0 :     MOZ_ASSERT(val->isRegister());
    4148           0 :     Register count = ToRegister(ins->temp());
    4149           0 :     masm.mov(ToRegister(val), count);
    4150           0 :     masm.andl(Imm32(shiftmask), count);
    4151           0 :     ScratchFloat32Scope scratch(masm);
    4152           0 :     masm.vmovd(count, scratch);
    4153             : 
    4154           0 :     switch (ins->type()) {
    4155             :       case MIRType::Int16x8:
    4156           0 :         switch (ins->operation()) {
    4157             :           case MSimdShift::lsh:
    4158           0 :             masm.packedLeftShiftByScalarInt16x8(scratch, out);
    4159           0 :             return;
    4160             :           case MSimdShift::rsh:
    4161           0 :             masm.packedRightShiftByScalarInt16x8(scratch, out);
    4162           0 :             return;
    4163             :           case MSimdShift::ursh:
    4164           0 :             masm.packedUnsignedRightShiftByScalarInt16x8(scratch, out);
    4165           0 :             return;
    4166             :         }
    4167           0 :         break;
    4168             :       case MIRType::Int32x4:
    4169           0 :         switch (ins->operation()) {
    4170             :           case MSimdShift::lsh:
    4171           0 :             masm.packedLeftShiftByScalarInt32x4(scratch, out);
    4172           0 :             return;
    4173             :           case MSimdShift::rsh:
    4174           0 :             masm.packedRightShiftByScalarInt32x4(scratch, out);
    4175           0 :             return;
    4176             :           case MSimdShift::ursh:
    4177           0 :             masm.packedUnsignedRightShiftByScalarInt32x4(scratch, out);
    4178           0 :             return;
    4179             :         }
    4180           0 :         break;
    4181             :       default:
    4182           0 :         MOZ_CRASH("unsupported type for SIMD shifts");
    4183             :     }
    4184           0 :     MOZ_CRASH("unexpected SIMD bitwise op");
    4185             : }
    4186             : 
    4187             : void
    4188           0 : CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect* ins)
    4189             : {
    4190           0 :     FloatRegister mask = ToFloatRegister(ins->mask());
    4191           0 :     FloatRegister onTrue = ToFloatRegister(ins->lhs());
    4192           0 :     FloatRegister onFalse = ToFloatRegister(ins->rhs());
    4193           0 :     FloatRegister output = ToFloatRegister(ins->output());
    4194           0 :     FloatRegister temp = ToFloatRegister(ins->temp());
    4195             : 
    4196           0 :     if (onTrue != output)
    4197           0 :         masm.vmovaps(onTrue, output);
    4198           0 :     if (mask != temp)
    4199           0 :         masm.vmovaps(mask, temp);
    4200             : 
    4201           0 :     MSimdSelect* mir = ins->mir();
    4202           0 :     unsigned lanes = SimdTypeToLength(mir->type());
    4203             : 
    4204           0 :     if (AssemblerX86Shared::HasAVX() && lanes == 4) {
    4205             :         // TBD: Use vpblendvb for lanes > 4, HasAVX.
    4206           0 :         masm.vblendvps(mask, onTrue, onFalse, output);
    4207           0 :         return;
    4208             :     }
    4209             : 
    4210             :     // SSE4.1 has plain blendvps which can do this, but it is awkward
    4211             :     // to use because it requires the mask to be in xmm0.
    4212             : 
    4213           0 :     masm.bitwiseAndSimd128(Operand(temp), output);
    4214           0 :     masm.bitwiseAndNotSimd128(Operand(onFalse), temp);
    4215           0 :     masm.bitwiseOrSimd128(Operand(temp), output);
    4216             : }
    4217             : 
    4218             : void
    4219           0 : CodeGeneratorX86Shared::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir)
    4220             : {
    4221           0 :     Register elements = ToRegister(lir->elements());
    4222           0 :     AnyRegister output = ToAnyRegister(lir->output());
    4223           0 :     Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
    4224             : 
    4225           0 :     Register oldval = ToRegister(lir->oldval());
    4226           0 :     Register newval = ToRegister(lir->newval());
    4227             : 
    4228           0 :     Scalar::Type arrayType = lir->mir()->arrayType();
    4229           0 :     int width = Scalar::byteSize(arrayType);
    4230             : 
    4231           0 :     if (lir->index()->isConstant()) {
    4232           0 :         Address dest(elements, ToInt32(lir->index()) * width);
    4233           0 :         masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
    4234             :     } else {
    4235           0 :         BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
    4236           0 :         masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
    4237             :     }
    4238           0 : }
    4239             : 
    4240             : void
    4241           0 : CodeGeneratorX86Shared::visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir)
    4242             : {
    4243           0 :     Register elements = ToRegister(lir->elements());
    4244           0 :     AnyRegister output = ToAnyRegister(lir->output());
    4245           0 :     Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
    4246             : 
    4247           0 :     Register value = ToRegister(lir->value());
    4248             : 
    4249           0 :     Scalar::Type arrayType = lir->mir()->arrayType();
    4250           0 :     int width = Scalar::byteSize(arrayType);
    4251             : 
    4252           0 :     if (lir->index()->isConstant()) {
    4253           0 :         Address dest(elements, ToInt32(lir->index()) * width);
    4254           0 :         masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
    4255             :     } else {
    4256           0 :         BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
    4257           0 :         masm.atomicExchangeToTypedIntArray(arrayType, dest, value, temp, output);
    4258             :     }
    4259           0 : }
    4260             : 
    4261             : template<typename S, typename T>
    4262             : void
    4263           0 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
    4264             :                                                    const T& mem, Register temp1, Register temp2, AnyRegister output)
    4265             : {
    4266           0 :     switch (arrayType) {
    4267             :       case Scalar::Int8:
    4268           0 :         switch (op) {
    4269             :           case AtomicFetchAddOp:
    4270           0 :             masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr());
    4271           0 :             break;
    4272             :           case AtomicFetchSubOp:
    4273           0 :             masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr());
    4274           0 :             break;
    4275             :           case AtomicFetchAndOp:
    4276           0 :             masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr());
    4277           0 :             break;
    4278             :           case AtomicFetchOrOp:
    4279           0 :             masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr());
    4280           0 :             break;
    4281             :           case AtomicFetchXorOp:
    4282           0 :             masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr());
    4283           0 :             break;
    4284             :           default:
    4285           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4286             :         }
    4287           0 :         break;
    4288             :       case Scalar::Uint8:
    4289           0 :         switch (op) {
    4290             :           case AtomicFetchAddOp:
    4291           0 :             masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr());
    4292           0 :             break;
    4293             :           case AtomicFetchSubOp:
    4294           0 :             masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr());
    4295           0 :             break;
    4296             :           case AtomicFetchAndOp:
    4297           0 :             masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr());
    4298           0 :             break;
    4299             :           case AtomicFetchOrOp:
    4300           0 :             masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr());
    4301           0 :             break;
    4302             :           case AtomicFetchXorOp:
    4303           0 :             masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr());
    4304           0 :             break;
    4305             :           default:
    4306           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4307             :         }
    4308           0 :         break;
    4309             :       case Scalar::Int16:
    4310           0 :         switch (op) {
    4311             :           case AtomicFetchAddOp:
    4312           0 :             masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr());
    4313           0 :             break;
    4314             :           case AtomicFetchSubOp:
    4315           0 :             masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr());
    4316           0 :             break;
    4317             :           case AtomicFetchAndOp:
    4318           0 :             masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr());
    4319           0 :             break;
    4320             :           case AtomicFetchOrOp:
    4321           0 :             masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr());
    4322           0 :             break;
    4323             :           case AtomicFetchXorOp:
    4324           0 :             masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr());
    4325           0 :             break;
    4326             :           default:
    4327           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4328             :         }
    4329           0 :         break;
    4330             :       case Scalar::Uint16:
    4331           0 :         switch (op) {
    4332             :           case AtomicFetchAddOp:
    4333           0 :             masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr());
    4334           0 :             break;
    4335             :           case AtomicFetchSubOp:
    4336           0 :             masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr());
    4337           0 :             break;
    4338             :           case AtomicFetchAndOp:
    4339           0 :             masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr());
    4340           0 :             break;
    4341             :           case AtomicFetchOrOp:
    4342           0 :             masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr());
    4343           0 :             break;
    4344             :           case AtomicFetchXorOp:
    4345           0 :             masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr());
    4346           0 :             break;
    4347             :           default:
    4348           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4349             :         }
    4350           0 :         break;
    4351             :       case Scalar::Int32:
    4352           0 :         switch (op) {
    4353             :           case AtomicFetchAddOp:
    4354           0 :             masm.atomicFetchAdd32(value, mem, temp1, output.gpr());
    4355           0 :             break;
    4356             :           case AtomicFetchSubOp:
    4357           0 :             masm.atomicFetchSub32(value, mem, temp1, output.gpr());
    4358           0 :             break;
    4359             :           case AtomicFetchAndOp:
    4360           0 :             masm.atomicFetchAnd32(value, mem, temp1, output.gpr());
    4361           0 :             break;
    4362             :           case AtomicFetchOrOp:
    4363           0 :             masm.atomicFetchOr32(value, mem, temp1, output.gpr());
    4364           0 :             break;
    4365             :           case AtomicFetchXorOp:
    4366           0 :             masm.atomicFetchXor32(value, mem, temp1, output.gpr());
    4367           0 :             break;
    4368             :           default:
    4369           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4370             :         }
    4371           0 :         break;
    4372             :       case Scalar::Uint32:
    4373             :         // At the moment, the code in MCallOptimize.cpp requires the output
    4374             :         // type to be double for uint32 arrays.  See bug 1077305.
    4375           0 :         MOZ_ASSERT(output.isFloat());
    4376           0 :         switch (op) {
    4377             :           case AtomicFetchAddOp:
    4378           0 :             masm.atomicFetchAdd32(value, mem, InvalidReg, temp1);
    4379           0 :             break;
    4380             :           case AtomicFetchSubOp:
    4381           0 :             masm.atomicFetchSub32(value, mem, InvalidReg, temp1);
    4382           0 :             break;
    4383             :           case AtomicFetchAndOp:
    4384           0 :             masm.atomicFetchAnd32(value, mem, temp2, temp1);
    4385           0 :             break;
    4386             :           case AtomicFetchOrOp:
    4387           0 :             masm.atomicFetchOr32(value, mem, temp2, temp1);
    4388           0 :             break;
    4389             :           case AtomicFetchXorOp:
    4390           0 :             masm.atomicFetchXor32(value, mem, temp2, temp1);
    4391           0 :             break;
    4392             :           default:
    4393           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4394             :         }
    4395           0 :         masm.convertUInt32ToDouble(temp1, output.fpu());
    4396           0 :         break;
    4397             :       default:
    4398           0 :         MOZ_CRASH("Invalid typed array type");
    4399             :     }
    4400           0 : }
    4401             : 
    4402             : template void
    4403             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4404             :                                                     const Imm32& value, const Address& mem,
    4405             :                                                     Register temp1, Register temp2, AnyRegister output);
    4406             : template void
    4407             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4408             :                                                     const Imm32& value, const BaseIndex& mem,
    4409             :                                                     Register temp1, Register temp2, AnyRegister output);
    4410             : template void
    4411             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4412             :                                                     const Register& value, const Address& mem,
    4413             :                                                     Register temp1, Register temp2, AnyRegister output);
    4414             : template void
    4415             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4416             :                                                     const Register& value, const BaseIndex& mem,
    4417             :                                                     Register temp1, Register temp2, AnyRegister output);
    4418             : 
    4419             : // Binary operation for effect, result discarded.
    4420             : template<typename S, typename T>
    4421             : void
    4422           0 : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
    4423             :                                                     const T& mem)
    4424             : {
    4425           0 :     switch (arrayType) {
    4426             :       case Scalar::Int8:
    4427             :       case Scalar::Uint8:
    4428           0 :         switch (op) {
    4429             :           case AtomicFetchAddOp:
    4430           0 :             masm.atomicAdd8(value, mem);
    4431           0 :             break;
    4432             :           case AtomicFetchSubOp:
    4433           0 :             masm.atomicSub8(value, mem);
    4434           0 :             break;
    4435             :           case AtomicFetchAndOp:
    4436           0 :             masm.atomicAnd8(value, mem);
    4437           0 :             break;
    4438             :           case AtomicFetchOrOp:
    4439           0 :             masm.atomicOr8(value, mem);
    4440           0 :             break;
    4441             :           case AtomicFetchXorOp:
    4442           0 :             masm.atomicXor8(value, mem);
    4443           0 :             break;
    4444             :           default:
    4445           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4446             :         }
    4447           0 :         break;
    4448             :       case Scalar::Int16:
    4449             :       case Scalar::Uint16:
    4450           0 :         switch (op) {
    4451             :           case AtomicFetchAddOp:
    4452           0 :             masm.atomicAdd16(value, mem);
    4453           0 :             break;
    4454             :           case AtomicFetchSubOp:
    4455           0 :             masm.atomicSub16(value, mem);
    4456           0 :             break;
    4457             :           case AtomicFetchAndOp:
    4458           0 :             masm.atomicAnd16(value, mem);
    4459           0 :             break;
    4460             :           case AtomicFetchOrOp:
    4461           0 :             masm.atomicOr16(value, mem);
    4462           0 :             break;
    4463             :           case AtomicFetchXorOp:
    4464           0 :             masm.atomicXor16(value, mem);
    4465           0 :             break;
    4466             :           default:
    4467           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4468             :         }
    4469           0 :         break;
    4470             :       case Scalar::Int32:
    4471             :       case Scalar::Uint32:
    4472           0 :         switch (op) {
    4473             :           case AtomicFetchAddOp:
    4474           0 :             masm.atomicAdd32(value, mem);
    4475           0 :             break;
    4476             :           case AtomicFetchSubOp:
    4477           0 :             masm.atomicSub32(value, mem);
    4478           0 :             break;
    4479             :           case AtomicFetchAndOp:
    4480           0 :             masm.atomicAnd32(value, mem);
    4481           0 :             break;
    4482             :           case AtomicFetchOrOp:
    4483           0 :             masm.atomicOr32(value, mem);
    4484           0 :             break;
    4485             :           case AtomicFetchXorOp:
    4486           0 :             masm.atomicXor32(value, mem);
    4487           0 :             break;
    4488             :           default:
    4489           0 :             MOZ_CRASH("Invalid typed array atomic operation");
    4490             :         }
    4491           0 :         break;
    4492             :       default:
    4493           0 :         MOZ_CRASH("Invalid typed array type");
    4494             :     }
    4495           0 : }
    4496             : 
    4497             : template void
    4498             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4499             :                                                    const Imm32& value, const Address& mem);
    4500             : template void
    4501             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4502             :                                                    const Imm32& value, const BaseIndex& mem);
    4503             : template void
    4504             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4505             :                                                    const Register& value, const Address& mem);
    4506             : template void
    4507             : CodeGeneratorX86Shared::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
    4508             :                                                    const Register& value, const BaseIndex& mem);
    4509             : 
    4510             : 
    4511             : template <typename T>
    4512             : static inline void
    4513           0 : AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
    4514             :                         Scalar::Type arrayType, const LAllocation* value, const T& mem,
    4515             :                         Register temp1, Register temp2, AnyRegister output)
    4516             : {
    4517           0 :     if (value->isConstant())
    4518           0 :         cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output);
    4519             :     else
    4520           0 :         cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output);
    4521           0 : }
    4522             : 
    4523             : void
    4524           0 : CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir)
    4525             : {
    4526           0 :     MOZ_ASSERT(lir->mir()->hasUses());
    4527             : 
    4528           0 :     AnyRegister output = ToAnyRegister(lir->output());
    4529           0 :     Register elements = ToRegister(lir->elements());
    4530           0 :     Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1());
    4531           0 :     Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
    4532           0 :     const LAllocation* value = lir->value();
    4533             : 
    4534           0 :     Scalar::Type arrayType = lir->mir()->arrayType();
    4535           0 :     int width = Scalar::byteSize(arrayType);
    4536             : 
    4537           0 :     if (lir->index()->isConstant()) {
    4538           0 :         Address mem(elements, ToInt32(lir->index()) * width);
    4539           0 :         AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
    4540             :     } else {
    4541           0 :         BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
    4542           0 :         AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
    4543             :     }
    4544           0 : }
    4545             : 
    4546             : template <typename T>
    4547             : static inline void
    4548           0 : AtomicBinopToTypedArray(CodeGeneratorX86Shared* cg, AtomicOp op,
    4549             :                         Scalar::Type arrayType, const LAllocation* value, const T& mem)
    4550             : {
    4551           0 :     if (value->isConstant())
    4552           0 :         cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem);
    4553             :     else
    4554           0 :         cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem);
    4555           0 : }
    4556             : 
    4557             : void
    4558           0 : CodeGeneratorX86Shared::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir)
    4559             : {
    4560           0 :     MOZ_ASSERT(!lir->mir()->hasUses());
    4561             : 
    4562           0 :     Register elements = ToRegister(lir->elements());
    4563           0 :     const LAllocation* value = lir->value();
    4564           0 :     Scalar::Type arrayType = lir->mir()->arrayType();
    4565           0 :     int width = Scalar::byteSize(arrayType);
    4566             : 
    4567           0 :     if (lir->index()->isConstant()) {
    4568           0 :         Address mem(elements, ToInt32(lir->index()) * width);
    4569           0 :         AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
    4570             :     } else {
    4571           0 :         BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
    4572           0 :         AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
    4573             :     }
    4574           0 : }
    4575             : 
    4576             : void
    4577           0 : CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier* ins)
    4578             : {
    4579           0 :     if (ins->type() & MembarStoreLoad)
    4580           0 :         masm.storeLoadFence();
    4581           0 : }
    4582             : 
    4583             : void
    4584           0 : CodeGeneratorX86Shared::setReturnDoubleRegs(LiveRegisterSet* regs)
    4585             : {
    4586           0 :     MOZ_ASSERT(ReturnFloat32Reg.encoding() == X86Encoding::xmm0);
    4587           0 :     MOZ_ASSERT(ReturnDoubleReg.encoding() == X86Encoding::xmm0);
    4588           0 :     MOZ_ASSERT(ReturnSimd128Reg.encoding() == X86Encoding::xmm0);
    4589           0 :     regs->add(ReturnFloat32Reg);
    4590           0 :     regs->add(ReturnDoubleReg);
    4591           0 :     regs->add(ReturnSimd128Reg);
    4592           0 : }
    4593             : 
    4594             : void
    4595           0 : CodeGeneratorX86Shared::visitOutOfLineWasmTruncateCheck(OutOfLineWasmTruncateCheck* ool)
    4596             : {
    4597           0 :     FloatRegister input = ool->input();
    4598           0 :     MIRType fromType = ool->fromType();
    4599           0 :     MIRType toType = ool->toType();
    4600           0 :     Label* oolRejoin = ool->rejoin();
    4601           0 :     bool isUnsigned = ool->isUnsigned();
    4602           0 :     wasm::BytecodeOffset off = ool->bytecodeOffset();
    4603             : 
    4604           0 :     if (fromType == MIRType::Float32) {
    4605           0 :         if (toType == MIRType::Int32)
    4606           0 :             masm.outOfLineWasmTruncateFloat32ToInt32(input, isUnsigned, off, oolRejoin);
    4607           0 :         else if (toType == MIRType::Int64)
    4608           0 :             masm.outOfLineWasmTruncateFloat32ToInt64(input, isUnsigned, off, oolRejoin);
    4609             :         else
    4610           0 :             MOZ_CRASH("unexpected type");
    4611           0 :     } else if (fromType == MIRType::Double) {
    4612           0 :         if (toType == MIRType::Int32)
    4613           0 :             masm.outOfLineWasmTruncateDoubleToInt32(input, isUnsigned, off, oolRejoin);
    4614           0 :         else if (toType == MIRType::Int64)
    4615           0 :             masm.outOfLineWasmTruncateDoubleToInt64(input, isUnsigned, off, oolRejoin);
    4616             :         else
    4617           0 :             MOZ_CRASH("unexpected type");
    4618             :     } else {
    4619           0 :         MOZ_CRASH("unexpected type");
    4620             :     }
    4621           0 : }
    4622             : 
    4623             : void
    4624           0 : CodeGeneratorX86Shared::canonicalizeIfDeterministic(Scalar::Type type, const LAllocation* value)
    4625             : {
    4626             : #ifdef JS_MORE_DETERMINISTIC
    4627             :     switch (type) {
    4628             :       case Scalar::Float32: {
    4629             :         FloatRegister in = ToFloatRegister(value);
    4630             :         masm.canonicalizeFloatIfDeterministic(in);
    4631             :         break;
    4632             :       }
    4633             :       case Scalar::Float64: {
    4634             :         FloatRegister in = ToFloatRegister(value);
    4635             :         masm.canonicalizeDoubleIfDeterministic(in);
    4636             :         break;
    4637             :       }
    4638             :       case Scalar::Float32x4: {
    4639             :         FloatRegister in = ToFloatRegister(value);
    4640             :         MOZ_ASSERT(in.isSimd128());
    4641             :         FloatRegister scratch = in != xmm0.asSimd128() ? xmm0 : xmm1;
    4642             :         masm.push(scratch);
    4643             :         masm.canonicalizeFloat32x4(in, scratch);
    4644             :         masm.pop(scratch);
    4645             :         break;
    4646             :       }
    4647             :       default: {
    4648             :         // Other types don't need canonicalization.
    4649             :         break;
    4650             :       }
    4651             :     }
    4652             : #endif // JS_MORE_DETERMINISTIC
    4653           0 : }
    4654             : 
    4655             : void
    4656           0 : CodeGeneratorX86Shared::visitCopySignF(LCopySignF* lir)
    4657             : {
    4658           0 :     FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
    4659           0 :     FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
    4660             : 
    4661           0 :     FloatRegister out = ToFloatRegister(lir->output());
    4662             : 
    4663           0 :     if (lhs == rhs) {
    4664           0 :         if (lhs != out)
    4665           0 :             masm.moveFloat32(lhs, out);
    4666           0 :         return;
    4667             :     }
    4668             : 
    4669           0 :     ScratchFloat32Scope scratch(masm);
    4670             : 
    4671           0 :     float clearSignMask = BitwiseCast<float>(INT32_MAX);
    4672           0 :     masm.loadConstantFloat32(clearSignMask, scratch);
    4673           0 :     masm.vandps(scratch, lhs, out);
    4674             : 
    4675           0 :     float keepSignMask = BitwiseCast<float>(INT32_MIN);
    4676           0 :     masm.loadConstantFloat32(keepSignMask, scratch);
    4677           0 :     masm.vandps(rhs, scratch, scratch);
    4678             : 
    4679           0 :     masm.vorps(scratch, out, out);
    4680             : }
    4681             : 
    4682             : void
    4683           0 : CodeGeneratorX86Shared::visitCopySignD(LCopySignD* lir)
    4684             : {
    4685           0 :     FloatRegister lhs = ToFloatRegister(lir->getOperand(0));
    4686           0 :     FloatRegister rhs = ToFloatRegister(lir->getOperand(1));
    4687             : 
    4688           0 :     FloatRegister out = ToFloatRegister(lir->output());
    4689             : 
    4690           0 :     if (lhs == rhs) {
    4691           0 :         if (lhs != out)
    4692           0 :             masm.moveDouble(lhs, out);
    4693           0 :         return;
    4694             :     }
    4695             : 
    4696           0 :     ScratchDoubleScope scratch(masm);
    4697             : 
    4698           0 :     double clearSignMask = BitwiseCast<double>(INT64_MAX);
    4699           0 :     masm.loadConstantDouble(clearSignMask, scratch);
    4700           0 :     masm.vandpd(scratch, lhs, out);
    4701             : 
    4702           0 :     double keepSignMask = BitwiseCast<double>(INT64_MIN);
    4703           0 :     masm.loadConstantDouble(keepSignMask, scratch);
    4704           0 :     masm.vandpd(rhs, scratch, scratch);
    4705             : 
    4706           0 :     masm.vorpd(scratch, out, out);
    4707             : }
    4708             : 
    4709             : void
    4710           0 : CodeGeneratorX86Shared::visitRotateI64(LRotateI64* lir)
    4711             : {
    4712           0 :     MRotate* mir = lir->mir();
    4713           0 :     LAllocation* count = lir->count();
    4714             : 
    4715           0 :     Register64 input = ToRegister64(lir->input());
    4716           0 :     Register64 output = ToOutRegister64(lir);
    4717           0 :     Register temp = ToTempRegisterOrInvalid(lir->temp());
    4718             : 
    4719           0 :     MOZ_ASSERT(input == output);
    4720             : 
    4721           0 :     if (count->isConstant()) {
    4722           0 :         int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
    4723           0 :         if (!c)
    4724           0 :             return;
    4725           0 :         if (mir->isLeftRotate())
    4726           0 :             masm.rotateLeft64(Imm32(c), input, output, temp);
    4727             :         else
    4728           0 :             masm.rotateRight64(Imm32(c), input, output, temp);
    4729             :     } else {
    4730           0 :         if (mir->isLeftRotate())
    4731           0 :             masm.rotateLeft64(ToRegister(count), input, output, temp);
    4732             :         else
    4733           0 :             masm.rotateRight64(ToRegister(count), input, output, temp);
    4734             :     }
    4735             : }
    4736             : 
    4737             : void
    4738           0 : CodeGeneratorX86Shared::visitPopcntI64(LPopcntI64* lir)
    4739             : {
    4740           0 :     Register64 input = ToRegister64(lir->getInt64Operand(0));
    4741           0 :     Register64 output = ToOutRegister64(lir);
    4742           0 :     Register temp = InvalidReg;
    4743           0 :     if (!AssemblerX86Shared::HasPOPCNT())
    4744           0 :         temp = ToRegister(lir->getTemp(0));
    4745             : 
    4746           0 :     masm.popcnt64(input, output, temp);
    4747           0 : }
    4748             : 
    4749             : } // namespace jit
    4750             : } // namespace js

Generated by: LCOV version 1.13