/src/mozilla-central/js/src/jit/x86-shared/Lowering-x86-shared.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
2 | | * vim: set ts=8 sts=4 et sw=4 tw=99: |
3 | | * This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "jit/x86-shared/Lowering-x86-shared.h" |
8 | | |
9 | | #include "mozilla/MathAlgorithms.h" |
10 | | |
11 | | #include "jit/Lowering.h" |
12 | | #include "jit/MIR.h" |
13 | | |
14 | | #include "jit/shared/Lowering-shared-inl.h" |
15 | | |
16 | | using namespace js; |
17 | | using namespace js::jit; |
18 | | |
19 | | using mozilla::Abs; |
20 | | using mozilla::FloorLog2; |
21 | | using mozilla::Swap; |
22 | | |
23 | | LTableSwitch* |
24 | | LIRGeneratorX86Shared::newLTableSwitch(const LAllocation& in, const LDefinition& inputCopy, |
25 | | MTableSwitch* tableswitch) |
26 | 0 | { |
27 | 0 | return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); |
28 | 0 | } |
29 | | |
30 | | LTableSwitchV* |
31 | | LIRGeneratorX86Shared::newLTableSwitchV(MTableSwitch* tableswitch) |
32 | 0 | { |
33 | 0 | return new(alloc()) LTableSwitchV(useBox(tableswitch->getOperand(0)), |
34 | 0 | temp(), tempDouble(), temp(), tableswitch); |
35 | 0 | } |
36 | | |
37 | | void |
38 | | LIRGenerator::visitPowHalf(MPowHalf* ins) |
39 | 0 | { |
40 | 0 | MDefinition* input = ins->input(); |
41 | 0 | MOZ_ASSERT(input->type() == MIRType::Double); |
42 | 0 | LPowHalfD* lir = new(alloc()) LPowHalfD(useRegisterAtStart(input)); |
43 | 0 | define(lir, ins); |
44 | 0 | } |
45 | | |
46 | | void |
47 | | LIRGeneratorX86Shared::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, |
48 | | MDefinition* lhs, MDefinition* rhs) |
49 | 0 | { |
50 | 0 | ins->setOperand(0, useRegisterAtStart(lhs)); |
51 | 0 |
|
52 | 0 | // shift operator should be constant or in register ecx |
53 | 0 | // x86 can't shift a non-ecx register |
54 | 0 | if (rhs->isConstant()) { |
55 | 0 | ins->setOperand(1, useOrConstantAtStart(rhs)); |
56 | 0 | } else { |
57 | 0 | ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx)); |
58 | 0 | } |
59 | 0 |
|
60 | 0 | defineReuseInput(ins, mir, 0); |
61 | 0 | } |
62 | | |
63 | | template<size_t Temps> |
64 | | void |
65 | | LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins, |
66 | | MDefinition* mir, MDefinition* lhs, MDefinition* rhs) |
67 | 0 | { |
68 | 0 | ins->setInt64Operand(0, useInt64RegisterAtStart(lhs)); |
69 | | #if defined(JS_NUNBOX32) |
70 | | if (mir->isRotate()) { |
71 | | ins->setTemp(0, temp()); |
72 | | } |
73 | | #endif |
74 | |
|
75 | 0 | static_assert(LShiftI64::Rhs == INT64_PIECES, "Assume Rhs is located at INT64_PIECES."); |
76 | 0 | static_assert(LRotateI64::Count == INT64_PIECES, "Assume Count is located at INT64_PIECES."); |
77 | 0 |
|
78 | 0 | // shift operator should be constant or in register ecx |
79 | 0 | // x86 can't shift a non-ecx register |
80 | 0 | if (rhs->isConstant()) { |
81 | 0 | ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs)); |
82 | 0 | } else { |
83 | 0 | // The operands are int64, but we only care about the lower 32 bits of |
84 | 0 | // the RHS. On 32-bit, the code below will load that part in ecx and |
85 | 0 | // will discard the upper half. |
86 | 0 | ensureDefined(rhs); |
87 | 0 | LUse use(ecx); |
88 | 0 | use.setVirtualRegister(rhs->virtualRegister()); |
89 | 0 | ins->setOperand(INT64_PIECES, use); |
90 | 0 | } |
91 | 0 |
|
92 | 0 | defineInt64ReuseInput(ins, mir, 0); |
93 | 0 | } Unexecuted instantiation: void js::jit::LIRGeneratorX86Shared::lowerForShiftInt64<0ul>(js::jit::LInstructionHelper<1ul, 2ul, 0ul>*, js::jit::MDefinition*, js::jit::MDefinition*, js::jit::MDefinition*) Unexecuted instantiation: void js::jit::LIRGeneratorX86Shared::lowerForShiftInt64<1ul>(js::jit::LInstructionHelper<1ul, 2ul, 1ul>*, js::jit::MDefinition*, js::jit::MDefinition*, js::jit::MDefinition*) |
94 | | |
95 | | template void LIRGeneratorX86Shared::lowerForShiftInt64( |
96 | | LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir, |
97 | | MDefinition* lhs, MDefinition* rhs); |
98 | | template void LIRGeneratorX86Shared::lowerForShiftInt64( |
99 | | LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir, |
100 | | MDefinition* lhs, MDefinition* rhs); |
101 | | |
102 | | void |
103 | | LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, |
104 | | MDefinition* input) |
105 | 0 | { |
106 | 0 | ins->setOperand(0, useRegisterAtStart(input)); |
107 | 0 | defineReuseInput(ins, mir, 0); |
108 | 0 | } |
109 | | |
110 | | void |
111 | | LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, |
112 | | MDefinition* lhs, MDefinition* rhs) |
113 | 0 | { |
114 | 0 | ins->setOperand(0, useRegisterAtStart(lhs)); |
115 | 0 | ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs)); |
116 | 0 | defineReuseInput(ins, mir, 0); |
117 | 0 | } |
118 | | |
119 | | template<size_t Temps> |
120 | | void |
121 | | LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs) |
122 | 0 | { |
123 | 0 | // Without AVX, we'll need to use the x86 encodings where one of the |
124 | 0 | // inputs must be the same location as the output. |
125 | 0 | if (!Assembler::HasAVX()) { |
126 | 0 | ins->setOperand(0, useRegisterAtStart(lhs)); |
127 | 0 | ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs)); |
128 | 0 | defineReuseInput(ins, mir, 0); |
129 | 0 | } else { |
130 | 0 | ins->setOperand(0, useRegisterAtStart(lhs)); |
131 | 0 | ins->setOperand(1, useAtStart(rhs)); |
132 | 0 | define(ins, mir); |
133 | 0 | } |
134 | 0 | } Unexecuted instantiation: void js::jit::LIRGeneratorX86Shared::lowerForFPU<0ul>(js::jit::LInstructionHelper<1ul, 2ul, 0ul>*, js::jit::MDefinition*, js::jit::MDefinition*, js::jit::MDefinition*) Unexecuted instantiation: void js::jit::LIRGeneratorX86Shared::lowerForFPU<1ul>(js::jit::LInstructionHelper<1ul, 2ul, 1ul>*, js::jit::MDefinition*, js::jit::MDefinition*, js::jit::MDefinition*) |
135 | | |
136 | | template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, |
137 | | MDefinition* lhs, MDefinition* rhs); |
138 | | template void LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1>* ins, MDefinition* mir, |
139 | | MDefinition* lhs, MDefinition* rhs); |
140 | | |
141 | | void |
142 | | LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir, |
143 | | MDefinition* lhs, MDefinition* rhs) |
144 | 0 | { |
145 | 0 | baab->setOperand(0, useRegisterAtStart(lhs)); |
146 | 0 | baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); |
147 | 0 | add(baab, mir); |
148 | 0 | } |
149 | | |
150 | | void |
151 | | LIRGeneratorX86Shared::lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs) |
152 | 0 | { |
153 | 0 | // Note: If we need a negative zero check, lhs is used twice. |
154 | 0 | LAllocation lhsCopy = mul->canBeNegativeZero() ? use(lhs) : LAllocation(); |
155 | 0 | LMulI* lir = new(alloc()) LMulI(useRegisterAtStart(lhs), useOrConstant(rhs), lhsCopy); |
156 | 0 | if (mul->fallible()) { |
157 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
158 | 0 | } |
159 | 0 | defineReuseInput(lir, mul, 0); |
160 | 0 | } |
161 | | |
162 | | void |
163 | | LIRGeneratorX86Shared::lowerDivI(MDiv* div) |
164 | 0 | { |
165 | 0 | if (div->isUnsigned()) { |
166 | 0 | lowerUDiv(div); |
167 | 0 | return; |
168 | 0 | } |
169 | 0 | |
170 | 0 | // Division instructions are slow. Division by constant denominators can be |
171 | 0 | // rewritten to use other instructions. |
172 | 0 | if (div->rhs()->isConstant()) { |
173 | 0 | int32_t rhs = div->rhs()->toConstant()->toInt32(); |
174 | 0 |
|
175 | 0 | // Division by powers of two can be done by shifting, and division by |
176 | 0 | // other numbers can be done by a reciprocal multiplication technique. |
177 | 0 | int32_t shift = FloorLog2(Abs(rhs)); |
178 | 0 | if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { |
179 | 0 | LAllocation lhs = useRegisterAtStart(div->lhs()); |
180 | 0 | LDivPowTwoI* lir; |
181 | 0 | if (!div->canBeNegativeDividend()) { |
182 | 0 | // Numerator is unsigned, so does not need adjusting. |
183 | 0 | lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0); |
184 | 0 | } else { |
185 | 0 | // Numerator is signed, and needs adjusting, and an extra |
186 | 0 | // lhs copy register is needed. |
187 | 0 | lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0); |
188 | 0 | } |
189 | 0 | if (div->fallible()) { |
190 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
191 | 0 | } |
192 | 0 | defineReuseInput(lir, div, 0); |
193 | 0 | return; |
194 | 0 | } |
195 | 0 | if (rhs != 0) { |
196 | 0 | LDivOrModConstantI* lir; |
197 | 0 | lir = new(alloc()) LDivOrModConstantI(useRegister(div->lhs()), rhs, tempFixed(eax)); |
198 | 0 | if (div->fallible()) { |
199 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
200 | 0 | } |
201 | 0 | defineFixed(lir, div, LAllocation(AnyRegister(edx))); |
202 | 0 | return; |
203 | 0 | } |
204 | 0 | } |
205 | 0 |
|
206 | 0 | LDivI* lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()), |
207 | 0 | tempFixed(edx)); |
208 | 0 | if (div->fallible()) { |
209 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
210 | 0 | } |
211 | 0 | defineFixed(lir, div, LAllocation(AnyRegister(eax))); |
212 | 0 | } |
213 | | |
214 | | void |
215 | | LIRGeneratorX86Shared::lowerModI(MMod* mod) |
216 | 0 | { |
217 | 0 | if (mod->isUnsigned()) { |
218 | 0 | lowerUMod(mod); |
219 | 0 | return; |
220 | 0 | } |
221 | 0 | |
222 | 0 | if (mod->rhs()->isConstant()) { |
223 | 0 | int32_t rhs = mod->rhs()->toConstant()->toInt32(); |
224 | 0 | int32_t shift = FloorLog2(Abs(rhs)); |
225 | 0 | if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) { |
226 | 0 | LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); |
227 | 0 | if (mod->fallible()) { |
228 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
229 | 0 | } |
230 | 0 | defineReuseInput(lir, mod, 0); |
231 | 0 | return; |
232 | 0 | } |
233 | 0 | if (rhs != 0) { |
234 | 0 | LDivOrModConstantI* lir; |
235 | 0 | lir = new(alloc()) LDivOrModConstantI(useRegister(mod->lhs()), rhs, tempFixed(edx)); |
236 | 0 | if (mod->fallible()) { |
237 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
238 | 0 | } |
239 | 0 | defineFixed(lir, mod, LAllocation(AnyRegister(eax))); |
240 | 0 | return; |
241 | 0 | } |
242 | 0 | } |
243 | 0 |
|
244 | 0 | LModI* lir = new(alloc()) LModI(useRegister(mod->lhs()), |
245 | 0 | useRegister(mod->rhs()), |
246 | 0 | tempFixed(eax)); |
247 | 0 | if (mod->fallible()) { |
248 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
249 | 0 | } |
250 | 0 | defineFixed(lir, mod, LAllocation(AnyRegister(edx))); |
251 | 0 | } |
252 | | |
253 | | void |
254 | | LIRGenerator::visitWasmSelect(MWasmSelect* ins) |
255 | 0 | { |
256 | 0 | if (ins->type() == MIRType::Int64) { |
257 | 0 | auto* lir = new(alloc()) LWasmSelectI64(useInt64RegisterAtStart(ins->trueExpr()), |
258 | 0 | useInt64(ins->falseExpr()), |
259 | 0 | useRegister(ins->condExpr())); |
260 | 0 |
|
261 | 0 | defineInt64ReuseInput(lir, ins, LWasmSelectI64::TrueExprIndex); |
262 | 0 | return; |
263 | 0 | } |
264 | 0 | |
265 | 0 | auto* lir = new(alloc()) LWasmSelect(useRegisterAtStart(ins->trueExpr()), |
266 | 0 | use(ins->falseExpr()), |
267 | 0 | useRegister(ins->condExpr())); |
268 | 0 |
|
269 | 0 | defineReuseInput(lir, ins, LWasmSelect::TrueExprIndex); |
270 | 0 | } |
271 | | |
272 | | void |
273 | | LIRGenerator::visitWasmNeg(MWasmNeg* ins) |
274 | 0 | { |
275 | 0 | switch (ins->type()) { |
276 | 0 | case MIRType::Int32: |
277 | 0 | defineReuseInput(new(alloc()) LNegI(useRegisterAtStart(ins->input())), ins, 0); |
278 | 0 | break; |
279 | 0 | case MIRType::Float32: |
280 | 0 | defineReuseInput(new(alloc()) LNegF(useRegisterAtStart(ins->input())), ins, 0); |
281 | 0 | break; |
282 | 0 | case MIRType::Double: |
283 | 0 | defineReuseInput(new(alloc()) LNegD(useRegisterAtStart(ins->input())), ins, 0); |
284 | 0 | break; |
285 | 0 | default: |
286 | 0 | MOZ_CRASH(); |
287 | 0 | } |
288 | 0 | } |
289 | | |
290 | | void |
291 | | LIRGeneratorX86Shared::lowerUDiv(MDiv* div) |
292 | 0 | { |
293 | 0 | if (div->rhs()->isConstant()) { |
294 | 0 | uint32_t rhs = div->rhs()->toConstant()->toInt32(); |
295 | 0 | int32_t shift = FloorLog2(rhs); |
296 | 0 |
|
297 | 0 | LAllocation lhs = useRegisterAtStart(div->lhs()); |
298 | 0 | if (rhs != 0 && uint32_t(1) << shift == rhs) { |
299 | 0 | LDivPowTwoI* lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, false); |
300 | 0 | if (div->fallible()) { |
301 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
302 | 0 | } |
303 | 0 | defineReuseInput(lir, div, 0); |
304 | 0 | } else { |
305 | 0 | LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(div->lhs()), |
306 | 0 | rhs, tempFixed(eax)); |
307 | 0 | if (div->fallible()) { |
308 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
309 | 0 | } |
310 | 0 | defineFixed(lir, div, LAllocation(AnyRegister(edx))); |
311 | 0 | } |
312 | 0 | return; |
313 | 0 | } |
314 | 0 |
|
315 | 0 | LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(div->lhs()), |
316 | 0 | useRegister(div->rhs()), |
317 | 0 | tempFixed(edx)); |
318 | 0 | if (div->fallible()) { |
319 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
320 | 0 | } |
321 | 0 | defineFixed(lir, div, LAllocation(AnyRegister(eax))); |
322 | 0 | } |
323 | | |
324 | | void |
325 | | LIRGeneratorX86Shared::lowerUMod(MMod* mod) |
326 | 0 | { |
327 | 0 | if (mod->rhs()->isConstant()) { |
328 | 0 | uint32_t rhs = mod->rhs()->toConstant()->toInt32(); |
329 | 0 | int32_t shift = FloorLog2(rhs); |
330 | 0 |
|
331 | 0 | if (rhs != 0 && uint32_t(1) << shift == rhs) { |
332 | 0 | LModPowTwoI* lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift); |
333 | 0 | if (mod->fallible()) { |
334 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
335 | 0 | } |
336 | 0 | defineReuseInput(lir, mod, 0); |
337 | 0 | } else { |
338 | 0 | LUDivOrModConstant* lir = new(alloc()) LUDivOrModConstant(useRegister(mod->lhs()), |
339 | 0 | rhs, tempFixed(edx)); |
340 | 0 | if (mod->fallible()) { |
341 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
342 | 0 | } |
343 | 0 | defineFixed(lir, mod, LAllocation(AnyRegister(eax))); |
344 | 0 | } |
345 | 0 | return; |
346 | 0 | } |
347 | 0 |
|
348 | 0 | LUDivOrMod* lir = new(alloc()) LUDivOrMod(useRegister(mod->lhs()), |
349 | 0 | useRegister(mod->rhs()), |
350 | 0 | tempFixed(eax)); |
351 | 0 | if (mod->fallible()) { |
352 | 0 | assignSnapshot(lir, Bailout_DoubleOutput); |
353 | 0 | } |
354 | 0 | defineFixed(lir, mod, LAllocation(AnyRegister(edx))); |
355 | 0 | } |
356 | | |
357 | | void |
358 | | LIRGeneratorX86Shared::lowerUrshD(MUrsh* mir) |
359 | 0 | { |
360 | 0 | MDefinition* lhs = mir->lhs(); |
361 | 0 | MDefinition* rhs = mir->rhs(); |
362 | 0 |
|
363 | 0 | MOZ_ASSERT(lhs->type() == MIRType::Int32); |
364 | 0 | MOZ_ASSERT(rhs->type() == MIRType::Int32); |
365 | 0 | MOZ_ASSERT(mir->type() == MIRType::Double); |
366 | 0 |
|
367 | 0 | #ifdef JS_CODEGEN_X64 |
368 | 0 | MOZ_ASSERT(ecx == rcx); |
369 | 0 | #endif |
370 | 0 |
|
371 | 0 | LUse lhsUse = useRegisterAtStart(lhs); |
372 | 0 | LAllocation rhsAlloc = rhs->isConstant() ? useOrConstant(rhs) : useFixed(rhs, ecx); |
373 | 0 |
|
374 | 0 | LUrshD* lir = new(alloc()) LUrshD(lhsUse, rhsAlloc, tempCopy(lhs, 0)); |
375 | 0 | define(lir, mir); |
376 | 0 | } |
377 | | |
378 | | void |
379 | | LIRGeneratorX86Shared::lowerTruncateDToInt32(MTruncateToInt32* ins) |
380 | 0 | { |
381 | 0 | MDefinition* opd = ins->input(); |
382 | 0 | MOZ_ASSERT(opd->type() == MIRType::Double); |
383 | 0 |
|
384 | 0 | LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempDouble(); |
385 | 0 | define(new(alloc()) LTruncateDToInt32(useRegister(opd), maybeTemp), ins); |
386 | 0 | } |
387 | | |
388 | | void |
389 | | LIRGeneratorX86Shared::lowerTruncateFToInt32(MTruncateToInt32* ins) |
390 | 0 | { |
391 | 0 | MDefinition* opd = ins->input(); |
392 | 0 | MOZ_ASSERT(opd->type() == MIRType::Float32); |
393 | 0 |
|
394 | 0 | LDefinition maybeTemp = Assembler::HasSSE3() ? LDefinition::BogusTemp() : tempFloat32(); |
395 | 0 | define(new(alloc()) LTruncateFToInt32(useRegister(opd), maybeTemp), ins); |
396 | 0 | } |
397 | | |
398 | | void |
399 | | LIRGeneratorX86Shared::lowerCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins, |
400 | | bool useI386ByteRegisters) |
401 | 0 | { |
402 | 0 | MOZ_ASSERT(ins->arrayType() != Scalar::Float32); |
403 | 0 | MOZ_ASSERT(ins->arrayType() != Scalar::Float64); |
404 | 0 |
|
405 | 0 | MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); |
406 | 0 | MOZ_ASSERT(ins->index()->type() == MIRType::Int32); |
407 | 0 |
|
408 | 0 | const LUse elements = useRegister(ins->elements()); |
409 | 0 | const LAllocation index = useRegisterOrConstant(ins->index()); |
410 | 0 |
|
411 | 0 | // If the target is a floating register then we need a temp at the |
412 | 0 | // lower level; that temp must be eax. |
413 | 0 | // |
414 | 0 | // Otherwise the target (if used) is an integer register, which |
415 | 0 | // must be eax. If the target is not used the machine code will |
416 | 0 | // still clobber eax, so just pretend it's used. |
417 | 0 | // |
418 | 0 | // oldval must be in a register. |
419 | 0 | // |
420 | 0 | // newval must be in a register. If the source is a byte array |
421 | 0 | // then newval must be a register that has a byte size: on x86 |
422 | 0 | // this must be ebx, ecx, or edx (eax is taken for the output). |
423 | 0 | // |
424 | 0 | // Bug #1077036 describes some further optimization opportunities. |
425 | 0 |
|
426 | 0 | bool fixedOutput = false; |
427 | 0 | LDefinition tempDef = LDefinition::BogusTemp(); |
428 | 0 | LAllocation newval; |
429 | 0 | if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { |
430 | 0 | tempDef = tempFixed(eax); |
431 | 0 | newval = useRegister(ins->newval()); |
432 | 0 | } else { |
433 | 0 | fixedOutput = true; |
434 | 0 | if (useI386ByteRegisters && ins->isByteArray()) { |
435 | 0 | newval = useFixed(ins->newval(), ebx); |
436 | 0 | } else { |
437 | 0 | newval = useRegister(ins->newval()); |
438 | 0 | } |
439 | 0 | } |
440 | 0 |
|
441 | 0 | const LAllocation oldval = useRegister(ins->oldval()); |
442 | 0 |
|
443 | 0 | LCompareExchangeTypedArrayElement* lir = |
444 | 0 | new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef); |
445 | 0 |
|
446 | 0 | if (fixedOutput) { |
447 | 0 | defineFixed(lir, ins, LAllocation(AnyRegister(eax))); |
448 | 0 | } else { |
449 | 0 | define(lir, ins); |
450 | 0 | } |
451 | 0 | } |
452 | | |
453 | | void |
454 | | LIRGeneratorX86Shared::lowerAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins, |
455 | | bool useI386ByteRegisters) |
456 | 0 | { |
457 | 0 | MOZ_ASSERT(ins->arrayType() <= Scalar::Uint32); |
458 | 0 |
|
459 | 0 | MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); |
460 | 0 | MOZ_ASSERT(ins->index()->type() == MIRType::Int32); |
461 | 0 |
|
462 | 0 | const LUse elements = useRegister(ins->elements()); |
463 | 0 | const LAllocation index = useRegisterOrConstant(ins->index()); |
464 | 0 | const LAllocation value = useRegister(ins->value()); |
465 | 0 |
|
466 | 0 | // The underlying instruction is XCHG, which can operate on any |
467 | 0 | // register. |
468 | 0 | // |
469 | 0 | // If the target is a floating register (for Uint32) then we need |
470 | 0 | // a temp into which to exchange. |
471 | 0 | // |
472 | 0 | // If the source is a byte array then we need a register that has |
473 | 0 | // a byte size; in this case -- on x86 only -- pin the output to |
474 | 0 | // an appropriate register and use that as a temp in the back-end. |
475 | 0 |
|
476 | 0 | LDefinition tempDef = LDefinition::BogusTemp(); |
477 | 0 | if (ins->arrayType() == Scalar::Uint32) { |
478 | 0 | // This restriction is bug 1077305. |
479 | 0 | MOZ_ASSERT(ins->type() == MIRType::Double); |
480 | 0 | tempDef = temp(); |
481 | 0 | } |
482 | 0 |
|
483 | 0 | LAtomicExchangeTypedArrayElement* lir = |
484 | 0 | new(alloc()) LAtomicExchangeTypedArrayElement(elements, index, value, tempDef); |
485 | 0 |
|
486 | 0 | if (useI386ByteRegisters && ins->isByteArray()) { |
487 | 0 | defineFixed(lir, ins, LAllocation(AnyRegister(eax))); |
488 | 0 | } else { |
489 | 0 | define(lir, ins); |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | | void |
494 | | LIRGeneratorX86Shared::lowerAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins, |
495 | | bool useI386ByteRegisters) |
496 | 0 | { |
497 | 0 | MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); |
498 | 0 | MOZ_ASSERT(ins->arrayType() != Scalar::Float32); |
499 | 0 | MOZ_ASSERT(ins->arrayType() != Scalar::Float64); |
500 | 0 |
|
501 | 0 | MOZ_ASSERT(ins->elements()->type() == MIRType::Elements); |
502 | 0 | MOZ_ASSERT(ins->index()->type() == MIRType::Int32); |
503 | 0 |
|
504 | 0 | const LUse elements = useRegister(ins->elements()); |
505 | 0 | const LAllocation index = useRegisterOrConstant(ins->index()); |
506 | 0 |
|
507 | 0 | // Case 1: the result of the operation is not used. |
508 | 0 | // |
509 | 0 | // We'll emit a single instruction: LOCK ADD, LOCK SUB, LOCK AND, |
510 | 0 | // LOCK OR, or LOCK XOR. We can do this even for the Uint32 case. |
511 | 0 |
|
512 | 0 | if (!ins->hasUses()) { |
513 | 0 | LAllocation value; |
514 | 0 | if (useI386ByteRegisters && ins->isByteArray() && !ins->value()->isConstant()) { |
515 | 0 | value = useFixed(ins->value(), ebx); |
516 | 0 | } else { |
517 | 0 | value = useRegisterOrConstant(ins->value()); |
518 | 0 | } |
519 | 0 |
|
520 | 0 | LAtomicTypedArrayElementBinopForEffect* lir = |
521 | 0 | new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value); |
522 | 0 |
|
523 | 0 | add(lir, ins); |
524 | 0 | return; |
525 | 0 | } |
526 | 0 |
|
527 | 0 | // Case 2: the result of the operation is used. |
528 | 0 | // |
529 | 0 | // For ADD and SUB we'll use XADD: |
530 | 0 | // |
531 | 0 | // movl src, output |
532 | 0 | // lock xaddl output, mem |
533 | 0 | // |
534 | 0 | // For the 8-bit variants XADD needs a byte register for the output. |
535 | 0 | // |
536 | 0 | // For AND/OR/XOR we need to use a CMPXCHG loop: |
537 | 0 | // |
538 | 0 | // movl *mem, eax |
539 | 0 | // L: mov eax, temp |
540 | 0 | // andl src, temp |
541 | 0 | // lock cmpxchg temp, mem ; reads eax also |
542 | 0 | // jnz L |
543 | 0 | // ; result in eax |
544 | 0 | // |
545 | 0 | // Note the placement of L, cmpxchg will update eax with *mem if |
546 | 0 | // *mem does not have the expected value, so reloading it at the |
547 | 0 | // top of the loop would be redundant. |
548 | 0 | // |
549 | 0 | // If the array is not a uint32 array then: |
550 | 0 | // - eax should be the output (one result of the cmpxchg) |
551 | 0 | // - there is a temp, which must have a byte register if |
552 | 0 | // the array has 1-byte elements elements |
553 | 0 | // |
554 | 0 | // If the array is a uint32 array then: |
555 | 0 | // - eax is the first temp |
556 | 0 | // - we also need a second temp |
557 | 0 | // |
558 | 0 | // There are optimization opportunities: |
559 | 0 | // - better register allocation in the x86 8-bit case, Bug #1077036. |
560 | 0 |
|
561 | 0 | bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp); |
562 | 0 | bool fixedOutput = true; |
563 | 0 | bool reuseInput = false; |
564 | 0 | LDefinition tempDef1 = LDefinition::BogusTemp(); |
565 | 0 | LDefinition tempDef2 = LDefinition::BogusTemp(); |
566 | 0 | LAllocation value; |
567 | 0 |
|
568 | 0 | if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) { |
569 | 0 | value = useRegisterOrConstant(ins->value()); |
570 | 0 | fixedOutput = false; |
571 | 0 | if (bitOp) { |
572 | 0 | tempDef1 = tempFixed(eax); |
573 | 0 | tempDef2 = temp(); |
574 | 0 | } else { |
575 | 0 | tempDef1 = temp(); |
576 | 0 | } |
577 | 0 | } else if (useI386ByteRegisters && ins->isByteArray()) { |
578 | 0 | if (ins->value()->isConstant()) { |
579 | 0 | value = useRegisterOrConstant(ins->value()); |
580 | 0 | } else { |
581 | 0 | value = useFixed(ins->value(), ebx); |
582 | 0 | } |
583 | 0 | if (bitOp) { |
584 | 0 | tempDef1 = tempFixed(ecx); |
585 | 0 | } |
586 | 0 | } else if (bitOp) { |
587 | 0 | value = useRegisterOrConstant(ins->value()); |
588 | 0 | tempDef1 = temp(); |
589 | 0 | } else if (ins->value()->isConstant()) { |
590 | 0 | fixedOutput = false; |
591 | 0 | value = useRegisterOrConstant(ins->value()); |
592 | 0 | } else { |
593 | 0 | fixedOutput = false; |
594 | 0 | reuseInput = true; |
595 | 0 | value = useRegisterAtStart(ins->value()); |
596 | 0 | } |
597 | 0 |
|
598 | 0 | LAtomicTypedArrayElementBinop* lir = |
599 | 0 | new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); |
600 | 0 |
|
601 | 0 | if (fixedOutput) { |
602 | 0 | defineFixed(lir, ins, LAllocation(AnyRegister(eax))); |
603 | 0 | } else if (reuseInput) { |
604 | 0 | defineReuseInput(lir, ins, LAtomicTypedArrayElementBinop::valueOp); |
605 | 0 | } else { |
606 | 0 | define(lir, ins); |
607 | 0 | } |
608 | 0 | } |
609 | | |
610 | | void |
611 | | LIRGenerator::visitCopySign(MCopySign* ins) |
612 | 0 | { |
613 | 0 | MDefinition* lhs = ins->lhs(); |
614 | 0 | MDefinition* rhs = ins->rhs(); |
615 | 0 |
|
616 | 0 | MOZ_ASSERT(IsFloatingPointType(lhs->type())); |
617 | 0 | MOZ_ASSERT(lhs->type() == rhs->type()); |
618 | 0 | MOZ_ASSERT(lhs->type() == ins->type()); |
619 | 0 |
|
620 | 0 | LInstructionHelper<1, 2, 2>* lir; |
621 | 0 | if (lhs->type() == MIRType::Double) { |
622 | 0 | lir = new(alloc()) LCopySignD(); |
623 | 0 | } else { |
624 | 0 | lir = new(alloc()) LCopySignF(); |
625 | 0 | } |
626 | 0 |
|
627 | 0 | // As lowerForFPU, but we want rhs to be in a FP register too. |
628 | 0 | lir->setOperand(0, useRegisterAtStart(lhs)); |
629 | 0 | lir->setOperand(1, lhs != rhs ? useRegister(rhs) : useRegisterAtStart(rhs)); |
630 | 0 | if (!Assembler::HasAVX()) { |
631 | 0 | defineReuseInput(lir, ins, 0); |
632 | 0 | } else { |
633 | 0 | define(lir, ins); |
634 | 0 | } |
635 | 0 | } |