Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99:
3 : * This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "jit/x86-shared/Lowering-x86-shared.h"
8 :
9 : #include "mozilla/MathAlgorithms.h"
10 :
11 : #include "jit/MIR.h"
12 :
13 : #include "jit/shared/Lowering-shared-inl.h"
14 :
15 : using namespace js;
16 : using namespace js::jit;
17 :
18 : using mozilla::Abs;
19 : using mozilla::FloorLog2;
20 : using mozilla::Swap;
21 :
22 : LTableSwitch*
23 0 : LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy,
24 : MTableSwitch* tableswitch)
25 : {
26 0 : return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
27 : }
28 :
29 : LTableSwitchV*
30 0 : LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch)
31 : {
32 0 : return new(alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)),
33 0 : temp(), tempDouble(), temp(), tableswitch);
34 : }
35 :
36 : void
37 8 : LIRGeneratorX86Shared::visitGuardShape(MGuardShape* ins)
38 : {
39 8 : MOZ_ASSERT(ins->object()->type() == MIRType::Object);
40 :
41 8 : LGuardShape* guard = new(alloc()) LGuardShape(useRegisterAtStart(ins->object()));
42 8 : assignSnapshot(guard, ins->bailoutKind());
43 8 : add(guard, ins);
44 8 : redefine(ins, ins->object());
45 8 : }
46 :
47 : void
48 0 : LIRGeneratorX86Shared::visitGuardObjectGroup(MGuardObjectGroup* ins)
49 : {
50 0 : MOZ_ASSERT(ins->object()->type() == MIRType::Object);
51 :
52 0 : LGuardObjectGroup* guard = new(alloc()) LGuardObjectGroup(useRegisterAtStart(ins->object()));
53 0 : assignSnapshot(guard, ins->bailoutKind());
54 0 : add(guard, ins);
55 0 : redefine(ins, ins->object());
56 0 : }
57 :
58 : void
59 0 : LIRGeneratorX86Shared::visitPowHalf(MPowHalf* ins)
60 : {
61 0 : MDefinition* input = ins->input();
62 0 : MOZ_ASSERT(input->type() == MIRType::Double);
63 0 : LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input));
64 0 : define(lir, ins);
65 0 : }
66 :
67 : void
68 0 : LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
69 : MDefinition* lhs, MDefinition* rhs)
70 : {
71 0 : ins->setOperand(0, useRegisterAtStart(lhs));
72 :
73 : // shift operator should be constant or in register ecx
74 : // x86 can't shift a non-ecx register
75 0 : if (rhs->isConstant())
76 0 : ins->setOperand(1, useOrConstantAtStart(rhs));
77 : else
78 0 : ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
79 :
80 0 : defineReuseInput(ins, mir, 0);
81 0 : }
82 :
83 : template<size_t Temps>
84 : void
85 0 : LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
86 : MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
87 : {
88 0 : ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
89 : #if defined(JS_NUNBOX32)
90 : if (mir->isRotate())
91 : ins->setTemp(0, temp());
92 : #endif
93 :
94 : static_assert(LShiftI64::Rhs == INT64_PIECES, "Assume Rhs is located at INT64_PIECES.");
95 : static_assert(LRotateI64::Count == INT64_PIECES, "Assume Count is located at INT64_PIECES.");
96 :
97 : // shift operator should be constant or in register ecx
98 : // x86 can't shift a non-ecx register
99 0 : if (rhs->isConstant()) {
100 0 : ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
101 : } else {
102 : // The operands are int64, but we only care about the lower 32 bits of
103 : // the RHS. On 32-bit, the code below will load that part in ecx and
104 : // will discard the upper half.
105 0 : ensureDefined(rhs);
106 0 : LUse use(ecx);
107 0 : use.setVirtualRegister(rhs->virtualRegister());
108 0 : ins->setOperand(INT64_PIECES, use);
109 : }
110 :
111 0 : defineInt64ReuseInput(ins, mir, 0);
112 0 : }
113 :
114 : template void LIRGeneratorX86Shared::lowerForShiftInt64(
115 : LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
116 : MDefinition* lhs, MDefinition* rhs);
117 : template void LIRGeneratorX86Shared::lowerForShiftInt64(
118 : LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
119 : MDefinition* lhs, MDefinition* rhs);
120 :
121 : void
122 0 : LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
123 : MDefinition* input)
124 : {
125 0 : ins->setOperand(0, useRegisterAtStart(input));
126 0 : defineReuseInput(ins, mir, 0);
127 0 : }
128 :
129 : void
130 8 : LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
131 : MDefinition* lhs, MDefinition* rhs)
132 : {
133 8 : ins->setOperand(0, useRegisterAtStart(lhs));
134 8 : ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
135 8 : defineReuseInput(ins, mir, 0);
136 8 : }
137 :
138 : template<size_t Temps>
139 : void
140 0 : LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
141 : {
142 : // Without AVX, we'll need to use the x86 encodings where one of the
143 : // inputs must be the same location as the output.
144 0 : if (!Assembler::HasAVX()) {
145 0 : ins->setOperand(0, useRegisterAtStart(lhs));
146 0 : ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
147 0 : defineReuseInput(ins, mir, 0);
148 : } else {
149 0 : ins->setOperand(0, useRegisterAtStart(lhs));
150 0 : ins->setOperand(1, useAtStart(rhs));
151 0 : define(ins, mir);
152 : }
153 0 : }
154 :
155 : template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
156 : MDefinition* lhs, MDefinition* rhs);
157 : template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir,
158 : MDefinition* lhs, MDefinition* rhs);
159 :
160 : void
161 0 : LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
162 : {
163 0 : lowerForALU(ins, mir, lhs, rhs);
164 0 : }
165 :
166 : void
167 0 : LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir, MDefinition* lhs, MDefinition* rhs)
168 : {
169 : // Swap the operands around to fit the instructions that x86 actually has.
170 : // We do this here, before register allocation, so that we don't need
171 : // temporaries and copying afterwards.
172 0 : switch (mir->operation()) {
173 : case MSimdBinaryComp::greaterThan:
174 : case MSimdBinaryComp::greaterThanOrEqual:
175 0 : mir->reverse();
176 0 : Swap(lhs, rhs);
177 0 : break;
178 : default:
179 0 : break;
180 : }
181 :
182 0 : lowerForFPU(ins, mir, lhs, rhs);
183 0 : }
184 :
185 : void
186 0 : LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
187 : MDefinition* lhs, MDefinition* rhs)
188 : {
189 0 : baab->setOperand(0, useRegisterAtStart(lhs));
190 0 : baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
191 0 : add(baab, mir);
192 0 : }
193 :
194 : void
195 0 : LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs)
196 : {
197 : // Note: If we need a negative zero check, lhs is used twice.
198 0 : LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation();
199 0 : LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy);
200 0 : if (mul->fallible())
201 0 : assignSnapshot(lir, Bailout_DoubleOutput);
202 0 : defineReuseInput(lir, mul, 0);
203 0 : }
204 :
205 : void
206 0 : LIRGeneratorX86Shared::lowerDivI(MDiv* div)
207 : {
208 0 : if (div->isUnsigned()) {
209 0 : lowerUDiv(div);
210 0 : return;
211 : }
212 :
213 : // Division instructions are slow. Division by constant denominators can be
214 : // rewritten to use other instructions.
215 0 : if (div->rhs()->isConstant()) {
216 0 : int32_t rhs = div->rhs()->toConstant()->toInt32();
217 :
218 : // Division by powers of two can be done by shifting, and division by
219 : // other numbers can be done by a reciprocal multiplication technique.
220 0 : int32_t shift = FloorLog2(Abs(rhs));
221 0 : if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
222 0 : LAllocation lhs = useRegisterAtStart(div->lhs());
223 : LDivPowTwoI* lir;
224 0 : if (!div->canBeNegativeDividend()) {
225 : // Numerator is unsigned, so does not need adjusting.
226 0 : lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
227 : } else {
228 : // Numerator is signed, and needs adjusting, and an extra
229 : // lhs copy register is needed.
230 0 : lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
231 : }
232 0 : if (div->fallible())
233 0 : assignSnapshot(lir, Bailout_DoubleOutput);
234 0 : defineReuseInput(lir, div, 0);
235 0 : return;
236 : }
237 0 : if (rhs != 0) {
238 : LDivOrModConstantI* lir;
239 0 : lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax));
240 0 : if (div->fallible())
241 0 : assignSnapshot(lir, Bailout_DoubleOutput);
242 0 : defineFixed(lir, div, LAllocation(AnyRegister(edx)));
243 0 : return;
244 : }
245 : }
246 :
247 0 : LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
248 0 : tempFixed(edx));
249 0 : if (div->fallible())
250 0 : assignSnapshot(lir, Bailout_DoubleOutput);
251 0 : defineFixed(lir, div, LAllocation(AnyRegister(eax)));
252 : }
253 :
254 : void
255 0 : LIRGeneratorX86Shared::lowerModI(MMod* mod)
256 : {
257 0 : if (mod->isUnsigned()) {
258 0 : lowerUMod(mod);
259 0 : return;
260 : }
261 :
262 0 : if (mod->rhs()->isConstant()) {
263 0 : int32_t rhs = mod->rhs()->toConstant()->toInt32();
264 0 : int32_t shift = FloorLog2(Abs(rhs));
265 0 : if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
266 0 : LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
267 0 : if (mod->fallible())
268 0 : assignSnapshot(lir, Bailout_DoubleOutput);
269 0 : defineReuseInput(lir, mod, 0);
270 0 : return;
271 : }
272 0 : if (rhs != 0) {
273 : LDivOrModConstantI* lir;
274 0 : lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx));
275 0 : if (mod->fallible())
276 0 : assignSnapshot(lir, Bailout_DoubleOutput);
277 0 : defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
278 0 : return;
279 : }
280 : }
281 :
282 0 : LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()),
283 0 : useRegister(mod->rhs()),
284 0 : tempFixed(eax));
285 0 : if (mod->fallible())
286 0 : assignSnapshot(lir, Bailout_DoubleOutput);
287 0 : defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
288 : }
289 :
290 : void
291 0 : LIRGeneratorX86Shared::visitWasmSelect(MWasmSelect* ins)
292 : {
293 0 : if (ins->type() == MIRType::Int64) {
294 0 : auto* lir = new(alloc()) LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()),
295 0 : useInt64(ins->falseExpr()),
296 0 : useRegister(ins->condExpr()));
297 :
298 0 : defineInt64ReuseInput(lir, ins, LWasmSelectI64::TrueExprIndex);
299 0 : return;
300 : }
301 :
302 0 : auto* lir = new(alloc()) LWasmSelect(useRegisterAtStart(ins->trueExpr()),
303 0 : use(ins->falseExpr()),
304 0 : useRegister(ins->condExpr()));
305 :
306 0 : defineReuseInput(lir, ins, LWasmSelect::TrueExprIndex);
307 : }
308 :
309 : void
310 0 : LIRGeneratorX86Shared::visitWasmNeg(MWasmNeg* ins)
311 : {
312 0 : switch (ins->type()) {
313 : case MIRType::Int32:
314 0 : defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0);
315 0 : break;
316 : case MIRType::Float32:
317 0 : defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0);
318 0 : break;
319 : case MIRType::Double:
320 0 : defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0);
321 0 : break;
322 : default:
323 0 : MOZ_CRASH();
324 : }
325 0 : }
326 :
327 : void
328 0 : LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
329 : {
330 0 : if (div->rhs()->isConstant()) {
331 0 : uint32_t rhs = div->rhs()->toConstant()->toInt32();
332 0 : int32_t shift = FloorLog2(rhs);
333 :
334 0 : LAllocation lhs = useRegisterAtStart(div->lhs());
335 0 : if (rhs != 0 && uint32_t(1) << shift == rhs) {
336 0 : LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false);
337 0 : if (div->fallible())
338 0 : assignSnapshot(lir, Bailout_DoubleOutput);
339 0 : defineReuseInput(lir, div, 0);
340 : } else {
341 0 : LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()),
342 0 : rhs, tempFixed(eax));
343 0 : if (div->fallible())
344 0 : assignSnapshot(lir, Bailout_DoubleOutput);
345 0 : defineFixed(lir, div, LAllocation(AnyRegister(edx)));
346 : }
347 0 : return;
348 : }
349 :
350 0 : LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()),
351 0 : useRegister(div->rhs()),
352 0 : tempFixed(edx));
353 0 : if (div->fallible())
354 0 : assignSnapshot(lir, Bailout_DoubleOutput);
355 0 : defineFixed(lir, div, LAllocation(AnyRegister(eax)));
356 : }
357 :
358 : void
359 0 : LIRGeneratorX86Shared::lowerUMod(MMod* mod)
360 : {
361 0 : if (mod->rhs()->isConstant()) {
362 0 : uint32_t rhs = mod->rhs()->toConstant()->toInt32();
363 0 : int32_t shift = FloorLog2(rhs);
364 :
365 0 : if (rhs != 0 && uint32_t(1) << shift == rhs) {
366 0 : LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
367 0 : if (mod->fallible())
368 0 : assignSnapshot(lir, Bailout_DoubleOutput);
369 0 : defineReuseInput(lir, mod, 0);
370 : } else {
371 0 : LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()),
372 0 : rhs, tempFixed(edx));
373 0 : if (mod->fallible())
374 0 : assignSnapshot(lir, Bailout_DoubleOutput);
375 0 : defineFixed(lir, mod, LAllocation(AnyRegister(eax)));
376 : }
377 0 : return;
378 : }
379 :
380 0 : LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()),
381 0 : useRegister(mod->rhs()),
382 0 : tempFixed(eax));
383 0 : if (mod->fallible())
384 0 : assignSnapshot(lir, Bailout_DoubleOutput);
385 0 : defineFixed(lir, mod, LAllocation(AnyRegister(edx)));
386 : }
387 :
388 : void
389 0 : LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir)
390 : {
391 0 : MDefinition* lhs = mir->lhs();
392 0 : MDefinition* rhs = mir->rhs();
393 :
394 0 : MOZ_ASSERT(lhs->type() == MIRType::Int32);
395 0 : MOZ_ASSERT(rhs->type() == MIRType::Int32);
396 0 : MOZ_ASSERT(mir->type() == MIRType::Double);
397 :
398 : #ifdef JS_CODEGEN_X64
399 0 : MOZ_ASSERT(ecx == rcx);
400 : #endif
401 :
402 0 : LUse lhsUse = useRegisterAtStart(lhs);
403 0 : LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx);
404 :
405 0 : LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0));
406 0 : define(lir, mir);
407 0 : }
408 :
409 : void
410 0 : LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins)
411 : {
412 0 : MDefinition* opd = ins->input();
413 0 : MOZ_ASSERT(opd->type() == MIRType::Double);
414 :
415 0 : LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble();
416 0 : define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins);
417 0 : }
418 :
419 : void
420 0 : LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins)
421 : {
422 0 : MDefinition* opd = ins->input();
423 0 : MOZ_ASSERT(opd->type() == MIRType::Float32);
424 :
425 0 : LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32();
426 0 : define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins);
427 0 : }
428 :
429 : void
430 0 : LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins,
431 : bool useI386ByteRegisters)
432 : {
433 0 : MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
434 0 : MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
435 :
436 0 : MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
437 0 : MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
438 :
439 0 : const LUse elements = useRegister(ins->elements());
440 0 : const LAllocation index = useRegisterOrConstant(ins->index());
441 :
442 : // If the target is a floating register then we need a temp at the
443 : // lower level; that temp must be eax.
444 : //
445 : // Otherwise the target (if used) is an integer register, which
446 : // must be eax. If the target is not used the machine code will
447 : // still clobber eax, so just pretend it's used.
448 : //
449 : // oldval must be in a register.
450 : //
451 : // newval must be in a register. If the source is a byte array
452 : // then newval must be a register that has a byte size: on x86
453 : // this must be ebx, ecx, or edx (eax is taken for the output).
454 : //
455 : // Bug #1077036 describes some further optimization opportunities.
456 :
457 0 : bool fixedOutput = false;
458 0 : LDefinition tempDef = LDefinition::BogusTemp();
459 0 : LAllocation newval;
460 0 : if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
461 0 : tempDef = tempFixed(eax);
462 0 : newval = useRegister(ins->newval());
463 : } else {
464 0 : fixedOutput = true;
465 0 : if (useI386ByteRegisters && ins->isByteArray())
466 0 : newval = useFixed(ins->newval(), ebx);
467 : else
468 0 : newval = useRegister(ins->newval());
469 : }
470 :
471 0 : const LAllocation oldval = useRegister(ins->oldval());
472 :
473 : LCompareExchangeTypedArrayElement* lir =
474 0 : new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);
475 :
476 0 : if (fixedOutput)
477 0 : defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
478 : else
479 0 : define(lir, ins);
480 0 : }
481 :
482 : void
483 0 : LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins,
484 : bool useI386ByteRegisters)
485 : {
486 0 : MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32);
487 :
488 0 : MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
489 0 : MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
490 :
491 0 : const LUse elements = useRegister(ins->elements());
492 0 : const LAllocation index = useRegisterOrConstant(ins->index());
493 0 : const LAllocation value = useRegister(ins->value());
494 :
495 : // The underlying instruction is XCHG, which can operate on any
496 : // register.
497 : //
498 : // If the target is a floating register (for Uint32) then we need
499 : // a temp into which to exchange.
500 : //
501 : // If the source is a byte array then we need a register that has
502 : // a byte size; in this case -- on x86 only -- pin the output to
503 : // an appropriate register and use that as a temp in the back-end.
504 :
505 0 : LDefinition tempDef = LDefinition::BogusTemp();
506 0 : if (ins->arrayType() == Scalar::Uint32) {
507 : // This restriction is bug 1077305.
508 0 : MOZ_ASSERT(ins->type() == MIRType::Double);
509 0 : tempDef = temp();
510 : }
511 :
512 : LAtomicExchangeTypedArrayElement* lir =
513 0 : new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef);
514 :
515 0 : if (useI386ByteRegisters && ins->isByteArray())
516 0 : defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
517 : else
518 0 : define(lir, ins);
519 0 : }
520 :
521 : void
522 0 : LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins,
523 : bool useI386ByteRegisters)
524 : {
525 0 : MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
526 0 : MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
527 0 : MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
528 :
529 0 : MOZ_ASSERT(ins->elements()->type() == MIRType::Elements);
530 0 : MOZ_ASSERT(ins->index()->type() == MIRType::Int32);
531 :
532 0 : const LUse elements = useRegister(ins->elements());
533 0 : const LAllocation index = useRegisterOrConstant(ins->index());
534 :
535 : // Case 1: the result of the operation is not used.
536 : //
537 : // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND,
538 : // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case.
539 :
540 0 : if (!ins->hasUses()) {
541 0 : LAllocation value;
542 0 : if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant())
543 0 : value = useFixed(ins->value(), ebx);
544 : else
545 0 : value = useRegisterOrConstant(ins->value());
546 :
547 : LAtomicTypedArrayElementBinopForEffect* lir =
548 0 : new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
549 :
550 0 : add(lir, ins);
551 0 : return;
552 : }
553 :
554 : // Case 2: the result of the operation is used.
555 : //
556 : // For ADD and SUB we'll use XADD:
557 : //
558 : // movl src, output
559 : // lock xaddl output, mem
560 : //
561 : // For the 8-bit variants XADD needs a byte register for the output.
562 : //
563 : // For AND/OR/XOR we need to use a CMPXCHG loop:
564 : //
565 : // movl *mem, eax
566 : // L: mov eax, temp
567 : // andl src, temp
568 : // lock cmpxchg temp, mem ; reads eax also
569 : // jnz L
570 : // ; result in eax
571 : //
572 : // Note the placement of L, cmpxchg will update eax with *mem if
573 : // *mem does not have the expected value, so reloading it at the
574 : // top of the loop would be redundant.
575 : //
576 : // If the array is not a uint32 array then:
577 : // - eax should be the output (one result of the cmpxchg)
578 : // - there is a temp, which must have a byte register if
579 : // the array has 1-byte elements elements
580 : //
581 : // If the array is a uint32 array then:
582 : // - eax is the first temp
583 : // - we also need a second temp
584 : //
585 : // There are optimization opportunities:
586 : // - better register allocation in the x86 8-bit case, Bug #1077036.
587 :
588 0 : bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
589 0 : bool fixedOutput = true;
590 0 : bool reuseInput = false;
591 0 : LDefinition tempDef1 = LDefinition::BogusTemp();
592 0 : LDefinition tempDef2 = LDefinition::BogusTemp();
593 0 : LAllocation value;
594 :
595 0 : if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
596 0 : value = useRegisterOrConstant(ins->value());
597 0 : fixedOutput = false;
598 0 : if (bitOp) {
599 0 : tempDef1 = tempFixed(eax);
600 0 : tempDef2 = temp();
601 : } else {
602 0 : tempDef1 = temp();
603 : }
604 0 : } else if (useI386ByteRegisters && ins->isByteArray()) {
605 0 : if (ins->value()->isConstant())
606 0 : value = useRegisterOrConstant(ins->value());
607 : else
608 0 : value = useFixed(ins->value(), ebx);
609 0 : if (bitOp)
610 0 : tempDef1 = tempFixed(ecx);
611 0 : } else if (bitOp) {
612 0 : value = useRegisterOrConstant(ins->value());
613 0 : tempDef1 = temp();
614 0 : } else if (ins->value()->isConstant()) {
615 0 : fixedOutput = false;
616 0 : value = useRegisterOrConstant(ins->value());
617 : } else {
618 0 : fixedOutput = false;
619 0 : reuseInput = true;
620 0 : value = useRegisterAtStart(ins->value());
621 : }
622 :
623 : LAtomicTypedArrayElementBinop* lir =
624 0 : new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
625 :
626 0 : if (fixedOutput)
627 0 : defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
628 0 : else if (reuseInput)
629 0 : defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp);
630 : else
631 0 : define(lir, ins);
632 : }
633 :
634 : void
635 0 : LIRGeneratorX86Shared::visitSimdInsertElement(MSimdInsertElement* ins)
636 : {
637 0 : MOZ_ASSERT(IsSimdType(ins->type()));
638 :
639 0 : LUse vec = useRegisterAtStart(ins->vector());
640 0 : LUse val = useRegister(ins->value());
641 0 : switch (ins->type()) {
642 : case MIRType::Int8x16:
643 : case MIRType::Bool8x16:
644 : // When SSE 4.1 is not available, we need to go via the stack.
645 : // This requires the value to be inserted to be in %eax-%edx.
646 : // Pick %ebx since other instructions use %eax or %ecx hard-wired.
647 : #if defined(JS_CODEGEN_X86)
648 : if (!AssemblerX86Shared::HasSSE41())
649 : val = useFixed(ins->value(), ebx);
650 : #endif
651 0 : defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0);
652 0 : break;
653 : case MIRType::Int16x8:
654 : case MIRType::Int32x4:
655 : case MIRType::Bool16x8:
656 : case MIRType::Bool32x4:
657 0 : defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0);
658 0 : break;
659 : case MIRType::Float32x4:
660 0 : defineReuseInput(new(alloc()) LSimdInsertElementF(vec, val), ins, 0);
661 0 : break;
662 : default:
663 0 : MOZ_CRASH("Unknown SIMD kind when generating constant");
664 : }
665 0 : }
666 :
667 : void
668 0 : LIRGeneratorX86Shared::visitSimdExtractElement(MSimdExtractElement* ins)
669 : {
670 0 : MOZ_ASSERT(IsSimdType(ins->input()->type()));
671 0 : MOZ_ASSERT(!IsSimdType(ins->type()));
672 :
673 0 : switch (ins->input()->type()) {
674 : case MIRType::Int8x16:
675 : case MIRType::Int16x8:
676 : case MIRType::Int32x4: {
677 0 : MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
678 0 : LUse use = useRegisterAtStart(ins->input());
679 0 : if (ins->type() == MIRType::Double) {
680 : // Extract an Uint32 lane into a double.
681 0 : MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
682 0 : define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
683 : } else {
684 0 : auto* lir = new (alloc()) LSimdExtractElementI(use);
685 : #if defined(JS_CODEGEN_X86)
686 : // On x86 (32-bit), we may need to use movsbl or movzbl instructions
687 : // to sign or zero extend the extracted lane to 32 bits. The 8-bit
688 : // version of these instructions require a source register that is
689 : // %al, %bl, %cl, or %dl.
690 : // Fix it to %ebx since we can't express that constraint better.
691 : if (ins->input()->type() == MIRType::Int8x16) {
692 : defineFixed(lir, ins, LAllocation(AnyRegister(ebx)));
693 : return;
694 : }
695 : #endif
696 0 : define(lir, ins);
697 : }
698 0 : break;
699 : }
700 : case MIRType::Float32x4: {
701 0 : MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
702 0 : LUse use = useRegisterAtStart(ins->input());
703 0 : define(new(alloc()) LSimdExtractElementF(use), ins);
704 0 : break;
705 : }
706 : case MIRType::Bool8x16:
707 : case MIRType::Bool16x8:
708 : case MIRType::Bool32x4: {
709 0 : MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
710 0 : LUse use = useRegisterAtStart(ins->input());
711 0 : define(new(alloc()) LSimdExtractElementB(use), ins);
712 0 : break;
713 : }
714 : default:
715 0 : MOZ_CRASH("Unknown SIMD kind when extracting element");
716 : }
717 0 : }
718 :
719 : void
720 0 : LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith* ins)
721 : {
722 0 : MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
723 0 : MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
724 0 : MOZ_ASSERT(IsSimdType(ins->type()));
725 :
726 0 : MDefinition* lhs = ins->lhs();
727 0 : MDefinition* rhs = ins->rhs();
728 :
729 0 : if (ins->isCommutative())
730 0 : ReorderCommutative(&lhs, &rhs, ins);
731 :
732 0 : switch (ins->type()) {
733 : case MIRType::Int8x16: {
734 0 : LSimdBinaryArithIx16* lir = new (alloc()) LSimdBinaryArithIx16();
735 0 : lir->setTemp(0, LDefinition::BogusTemp());
736 0 : lowerForFPU(lir, ins, lhs, rhs);
737 0 : return;
738 : }
739 :
740 : case MIRType::Int16x8: {
741 0 : LSimdBinaryArithIx8* lir = new (alloc()) LSimdBinaryArithIx8();
742 0 : lir->setTemp(0, LDefinition::BogusTemp());
743 0 : lowerForFPU(lir, ins, lhs, rhs);
744 0 : return;
745 : }
746 :
747 : case MIRType::Int32x4: {
748 0 : LSimdBinaryArithIx4* lir = new (alloc()) LSimdBinaryArithIx4();
749 : bool needsTemp =
750 0 : ins->operation() == MSimdBinaryArith::Op_mul && !MacroAssembler::HasSSE41();
751 0 : lir->setTemp(0, needsTemp ? temp(LDefinition::SIMD128INT) : LDefinition::BogusTemp());
752 0 : lowerForFPU(lir, ins, lhs, rhs);
753 0 : return;
754 : }
755 :
756 : case MIRType::Float32x4: {
757 0 : LSimdBinaryArithFx4* lir = new (alloc()) LSimdBinaryArithFx4();
758 :
759 0 : bool needsTemp = ins->operation() == MSimdBinaryArith::Op_max ||
760 0 : ins->operation() == MSimdBinaryArith::Op_minNum ||
761 0 : ins->operation() == MSimdBinaryArith::Op_maxNum;
762 0 : lir->setTemp(0,
763 0 : needsTemp ? temp(LDefinition::SIMD128FLOAT) : LDefinition::BogusTemp());
764 0 : lowerForFPU(lir, ins, lhs, rhs);
765 0 : return;
766 : }
767 :
768 : default:
769 0 : MOZ_CRASH("unknown simd type on binary arith operation");
770 : }
771 : }
772 :
773 : void
774 0 : LIRGeneratorX86Shared::visitSimdBinarySaturating(MSimdBinarySaturating* ins)
775 : {
776 0 : MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
777 0 : MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
778 0 : MOZ_ASSERT(IsSimdType(ins->type()));
779 :
780 0 : MDefinition* lhs = ins->lhs();
781 0 : MDefinition* rhs = ins->rhs();
782 :
783 0 : if (ins->isCommutative())
784 0 : ReorderCommutative(&lhs, &rhs, ins);
785 :
786 0 : LSimdBinarySaturating* lir = new (alloc()) LSimdBinarySaturating();
787 0 : lowerForFPU(lir, ins, lhs, rhs);
788 0 : }
789 :
790 : void
791 0 : LIRGeneratorX86Shared::visitSimdSelect(MSimdSelect* ins)
792 : {
793 0 : MOZ_ASSERT(IsSimdType(ins->type()));
794 :
795 0 : LSimdSelect* lins = new(alloc()) LSimdSelect;
796 0 : MDefinition* r0 = ins->getOperand(0);
797 0 : MDefinition* r1 = ins->getOperand(1);
798 0 : MDefinition* r2 = ins->getOperand(2);
799 :
800 0 : lins->setOperand(0, useRegister(r0));
801 0 : lins->setOperand(1, useRegister(r1));
802 0 : lins->setOperand(2, useRegister(r2));
803 0 : lins->setTemp(0, temp(LDefinition::SIMD128FLOAT));
804 :
805 0 : define(lins, ins);
806 0 : }
807 :
808 : void
809 0 : LIRGeneratorX86Shared::visitSimdSplat(MSimdSplat* ins)
810 : {
811 0 : LAllocation x = useRegisterAtStart(ins->getOperand(0));
812 :
813 0 : switch (ins->type()) {
814 : case MIRType::Int8x16:
815 0 : define(new (alloc()) LSimdSplatX16(x), ins);
816 0 : break;
817 : case MIRType::Int16x8:
818 0 : define(new (alloc()) LSimdSplatX8(x), ins);
819 0 : break;
820 : case MIRType::Int32x4:
821 : case MIRType::Float32x4:
822 : case MIRType::Bool8x16:
823 : case MIRType::Bool16x8:
824 : case MIRType::Bool32x4:
825 : // Use the SplatX4 instruction for all boolean splats. Since the input
826 : // value is a 32-bit int that is either 0 or -1, the X4 splat gives
827 : // the right result for all boolean geometries.
828 : // For floats, (Non-AVX) codegen actually wants the input and the output
829 : // to be in the same register, but we can't currently use
830 : // defineReuseInput because they have different types (scalar vs
831 : // vector), so a spill slot for one may not be suitable for the other.
832 0 : define(new (alloc()) LSimdSplatX4(x), ins);
833 0 : break;
834 : default:
835 0 : MOZ_CRASH("Unknown SIMD kind");
836 : }
837 0 : }
838 :
839 : void
840 0 : LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4* ins)
841 : {
842 0 : switch (ins->type()) {
843 : case MIRType::Float32x4: {
844 : // Ideally, x would be used at start and reused for the output, however
845 : // register allocation currently doesn't permit us to tie together two
846 : // virtual registers with different types.
847 0 : LAllocation x = useRegister(ins->getOperand(0));
848 0 : LAllocation y = useRegister(ins->getOperand(1));
849 0 : LAllocation z = useRegister(ins->getOperand(2));
850 0 : LAllocation w = useRegister(ins->getOperand(3));
851 0 : LDefinition t = temp(LDefinition::SIMD128FLOAT);
852 0 : define(new (alloc()) LSimdValueFloat32x4(x, y, z, w, t), ins);
853 0 : break;
854 : }
855 : case MIRType::Bool32x4:
856 : case MIRType::Int32x4: {
857 : // No defineReuseInput => useAtStart for everyone.
858 0 : LAllocation x = useRegisterAtStart(ins->getOperand(0));
859 0 : LAllocation y = useRegisterAtStart(ins->getOperand(1));
860 0 : LAllocation z = useRegisterAtStart(ins->getOperand(2));
861 0 : LAllocation w = useRegisterAtStart(ins->getOperand(3));
862 0 : define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
863 0 : break;
864 : }
865 : default:
866 0 : MOZ_CRASH("Unknown SIMD kind");
867 : }
868 0 : }
869 :
870 : void
871 0 : LIRGeneratorX86Shared::visitSimdSwizzle(MSimdSwizzle* ins)
872 : {
873 0 : MOZ_ASSERT(IsSimdType(ins->input()->type()));
874 0 : MOZ_ASSERT(IsSimdType(ins->type()));
875 :
876 0 : if (IsIntegerSimdType(ins->input()->type())) {
877 0 : LUse use = useRegisterAtStart(ins->input());
878 0 : LSimdSwizzleI* lir = new (alloc()) LSimdSwizzleI(use);
879 0 : define(lir, ins);
880 : // We need a GPR temp register for pre-SSSE3 codegen (no vpshufb).
881 0 : if (Assembler::HasSSSE3()) {
882 0 : lir->setTemp(0, LDefinition::BogusTemp());
883 : } else {
884 : // The temp must be a GPR usable with 8-bit loads and stores.
885 : #if defined(JS_CODEGEN_X86)
886 : lir->setTemp(0, tempFixed(ebx));
887 : #else
888 0 : lir->setTemp(0, temp());
889 : #endif
890 : }
891 0 : } else if (ins->input()->type() == MIRType::Float32x4) {
892 0 : LUse use = useRegisterAtStart(ins->input());
893 0 : LSimdSwizzleF* lir = new (alloc()) LSimdSwizzleF(use);
894 0 : define(lir, ins);
895 0 : lir->setTemp(0, LDefinition::BogusTemp());
896 : } else {
897 0 : MOZ_CRASH("Unknown SIMD kind when getting lane");
898 : }
899 0 : }
900 :
901 : void
902 0 : LIRGeneratorX86Shared::visitSimdShuffle(MSimdShuffle* ins)
903 : {
904 0 : MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
905 0 : MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
906 0 : MOZ_ASSERT(IsSimdType(ins->type()));
907 0 : if (ins->type() == MIRType::Int32x4 || ins->type() == MIRType::Float32x4) {
908 0 : bool zFromLHS = ins->lane(2) < 4;
909 0 : bool wFromLHS = ins->lane(3) < 4;
910 0 : uint32_t lanesFromLHS = (ins->lane(0) < 4) + (ins->lane(1) < 4) + zFromLHS + wFromLHS;
911 :
912 0 : LSimdShuffleX4* lir = new (alloc()) LSimdShuffleX4();
913 0 : lowerForFPU(lir, ins, ins->lhs(), ins->rhs());
914 :
915 : // See codegen for requirements details.
916 : LDefinition temp =
917 0 : (lanesFromLHS == 3) ? tempCopy(ins->rhs(), 1) : LDefinition::BogusTemp();
918 0 : lir->setTemp(0, temp);
919 : } else {
920 0 : MOZ_ASSERT(ins->type() == MIRType::Int8x16 || ins->type() == MIRType::Int16x8);
921 0 : LSimdShuffle* lir = new (alloc()) LSimdShuffle();
922 0 : lir->setOperand(0, useRegister(ins->lhs()));
923 0 : lir->setOperand(1, useRegister(ins->rhs()));
924 0 : define(lir, ins);
925 : // We need a GPR temp register for pre-SSSE3 codegen, and an SSE temp
926 : // when using pshufb.
927 0 : if (Assembler::HasSSSE3()) {
928 0 : lir->setTemp(0, temp(LDefinition::SIMD128INT));
929 : } else {
930 : // The temp must be a GPR usable with 8-bit loads and stores.
931 : #if defined(JS_CODEGEN_X86)
932 : lir->setTemp(0, tempFixed(ebx));
933 : #else
934 0 : lir->setTemp(0, temp());
935 : #endif
936 : }
937 : }
938 0 : }
939 :
940 : void
941 0 : LIRGeneratorX86Shared::visitSimdGeneralShuffle(MSimdGeneralShuffle* ins)
942 : {
943 0 : MOZ_ASSERT(IsSimdType(ins->type()));
944 :
945 : LSimdGeneralShuffleBase* lir;
946 0 : if (IsIntegerSimdType(ins->type())) {
947 : #if defined(JS_CODEGEN_X86)
948 : // The temp register must be usable with 8-bit load and store
949 : // instructions, so one of %eax-%edx.
950 : LDefinition t;
951 : if (ins->type() == MIRType::Int8x16)
952 : t = tempFixed(ebx);
953 : else
954 : t = temp();
955 : #else
956 0 : LDefinition t = temp();
957 : #endif
958 0 : lir = new (alloc()) LSimdGeneralShuffleI(t);
959 0 : } else if (ins->type() == MIRType::Float32x4) {
960 0 : lir = new (alloc()) LSimdGeneralShuffleF(temp());
961 : } else {
962 0 : MOZ_CRASH("Unknown SIMD kind when doing a shuffle");
963 : }
964 :
965 0 : if (!lir->init(alloc(), ins->numVectors() + ins->numLanes()))
966 0 : return;
967 :
968 0 : for (unsigned i = 0; i < ins->numVectors(); i++) {
969 0 : MOZ_ASSERT(IsSimdType(ins->vector(i)->type()));
970 0 : lir->setOperand(i, useRegister(ins->vector(i)));
971 : }
972 :
973 0 : for (unsigned i = 0; i < ins->numLanes(); i++) {
974 0 : MOZ_ASSERT(ins->lane(i)->type() == MIRType::Int32);
975 : // Note that there can be up to 16 lane arguments, so we can't assume
976 : // that they all get an allocated register.
977 0 : lir->setOperand(i + ins->numVectors(), use(ins->lane(i)));
978 : }
979 :
980 0 : assignSnapshot(lir, Bailout_BoundsCheck);
981 0 : define(lir, ins);
982 : }
983 :
984 : void
985 0 : LIRGeneratorX86Shared::visitCopySign(MCopySign* ins)
986 : {
987 0 : MDefinition* lhs = ins->lhs();
988 0 : MDefinition* rhs = ins->rhs();
989 :
990 0 : MOZ_ASSERT(IsFloatingPointType(lhs->type()));
991 0 : MOZ_ASSERT(lhs->type() == rhs->type());
992 0 : MOZ_ASSERT(lhs->type() == ins->type());
993 :
994 : LInstructionHelper<1, 2, 2>* lir;
995 0 : if (lhs->type() == MIRType::Double)
996 0 : lir = new(alloc()) LCopySignD();
997 : else
998 0 : lir = new(alloc()) LCopySignF();
999 :
1000 : // As lowerForFPU, but we want rhs to be in a FP register too.
1001 0 : lir->setOperand(0, useRegisterAtStart(lhs));
1002 0 : lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs));
1003 0 : if (!Assembler::HasAVX())
1004 0 : defineReuseInput(lir, ins, 0);
1005 : else
1006 0 : define(lir, ins);
1007 0 : }
|