Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/code-generator.h"
6 :
7 : #include <limits>
8 :
9 : #include "src/base/overflowing-math.h"
10 : #include "src/compiler/backend/code-generator-impl.h"
11 : #include "src/compiler/backend/gap-resolver.h"
12 : #include "src/compiler/node-matchers.h"
13 : #include "src/compiler/osr.h"
14 : #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 : #include "src/macro-assembler.h"
16 : #include "src/objects/smi.h"
17 : #include "src/optimized-compilation-info.h"
18 : #include "src/wasm/wasm-code-manager.h"
19 : #include "src/wasm/wasm-objects.h"
20 : #include "src/x64/assembler-x64.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 : namespace compiler {
25 :
26 : #define __ tasm()->
27 :
28 : // Adds X64 specific methods for decoding operands.
29 : class X64OperandConverter : public InstructionOperandConverter {
30 : public:
31 : X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : : InstructionOperandConverter(gen, instr) {}
33 :
34 : Immediate InputImmediate(size_t index) {
35 4961201 : return ToImmediate(instr_->InputAt(index));
36 : }
37 :
38 1067332 : Operand InputOperand(size_t index, int extra = 0) {
39 3201998 : return ToOperand(instr_->InputAt(index), extra);
40 : }
41 :
42 0 : Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 :
44 3938198 : Immediate ToImmediate(InstructionOperand* operand) {
45 3938198 : Constant constant = ToConstant(operand);
46 3938218 : if (constant.type() == Constant::kFloat64) {
47 : DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 2183 : return Immediate(0);
49 : }
50 3936035 : if (RelocInfo::IsWasmReference(constant.rmode())) {
51 0 : return Immediate(constant.ToInt32(), constant.rmode());
52 : }
53 3936035 : return Immediate(constant.ToInt32());
54 : }
55 :
56 : Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 : DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 14353980 : return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 : }
60 :
61 14366060 : Operand SlotToOperand(int slot_index, int extra = 0) {
62 28732120 : FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 : return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 28732120 : offset.offset() + extra);
65 : }
66 :
67 : static size_t NextOffset(size_t* offset) {
68 15226274 : size_t i = *offset;
69 27695465 : (*offset)++;
70 : return i;
71 : }
72 :
73 : static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 : STATIC_ASSERT(0 == static_cast<int>(times_1));
75 : STATIC_ASSERT(1 == static_cast<int>(times_2));
76 : STATIC_ASSERT(2 == static_cast<int>(times_4));
77 : STATIC_ASSERT(3 == static_cast<int>(times_8));
78 1423067 : int scale = static_cast<int>(mode - one);
79 : DCHECK(scale >= 0 && scale < 4);
80 1423067 : return static_cast<ScaleFactor>(scale);
81 : }
82 :
83 15226274 : Operand MemoryOperand(size_t* offset) {
84 15226274 : AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 15226274 : switch (mode) {
86 : case kMode_MR: {
87 16799006 : Register base = InputRegister(NextOffset(offset));
88 : int32_t disp = 0;
89 1795612 : return Operand(base, disp);
90 : }
91 : case kMode_MRI: {
92 10531249 : Register base = InputRegister(NextOffset(offset));
93 10531249 : int32_t disp = InputInt32(NextOffset(offset));
94 10531275 : return Operand(base, disp);
95 : }
96 : case kMode_MR1:
97 : case kMode_MR2:
98 : case kMode_MR4:
99 : case kMode_MR8: {
100 715168 : Register base = InputRegister(NextOffset(offset));
101 715168 : Register index = InputRegister(NextOffset(offset));
102 : ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 : int32_t disp = 0;
104 715168 : return Operand(base, index, scale, disp);
105 : }
106 : case kMode_MR1I:
107 : case kMode_MR2I:
108 : case kMode_MR4I:
109 : case kMode_MR8I: {
110 538298 : Register base = InputRegister(NextOffset(offset));
111 538298 : Register index = InputRegister(NextOffset(offset));
112 : ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 538298 : int32_t disp = InputInt32(NextOffset(offset));
114 538302 : return Operand(base, index, scale, disp);
115 : }
116 : case kMode_M1: {
117 0 : Register base = InputRegister(NextOffset(offset));
118 : int32_t disp = 0;
119 0 : return Operand(base, disp);
120 : }
121 : case kMode_M2:
122 0 : UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 : return Operand(no_reg, 0);
124 : case kMode_M4:
125 : case kMode_M8: {
126 23423 : Register index = InputRegister(NextOffset(offset));
127 : ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 : int32_t disp = 0;
129 23423 : return Operand(index, scale, disp);
130 : }
131 : case kMode_M1I:
132 : case kMode_M2I:
133 : case kMode_M4I:
134 : case kMode_M8I: {
135 146178 : Register index = InputRegister(NextOffset(offset));
136 : ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 146178 : int32_t disp = InputInt32(NextOffset(offset));
138 146178 : return Operand(index, scale, disp);
139 : }
140 : case kMode_Root: {
141 1476346 : Register base = kRootRegister;
142 1476346 : int32_t disp = InputInt32(NextOffset(offset));
143 1476347 : return Operand(base, disp);
144 : }
145 : case kMode_None:
146 0 : UNREACHABLE();
147 : }
148 0 : UNREACHABLE();
149 : }
150 :
151 : Operand MemoryOperand(size_t first_input = 0) {
152 8169070 : return MemoryOperand(&first_input);
153 : }
154 : };
155 :
156 : namespace {
157 :
158 : bool HasImmediateInput(Instruction* instr, size_t index) {
159 20369263 : return instr->InputAt(index)->IsImmediate();
160 : }
161 :
162 0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 : public:
164 : OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 132 : : OutOfLineCode(gen), result_(result) {}
166 :
167 132 : void Generate() final {
168 264 : __ Xorps(result_, result_);
169 : __ Divss(result_, result_);
170 132 : }
171 :
172 : private:
173 : XMMRegister const result_;
174 : };
175 :
176 0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 : public:
178 : OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 585 : : OutOfLineCode(gen), result_(result) {}
180 :
181 585 : void Generate() final {
182 1170 : __ Xorpd(result_, result_);
183 : __ Divsd(result_, result_);
184 585 : }
185 :
186 : private:
187 : XMMRegister const result_;
188 : };
189 :
190 0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 : public:
192 104672 : OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 : XMMRegister input, StubCallMode stub_mode,
194 : UnwindingInfoWriter* unwinding_info_writer)
195 : : OutOfLineCode(gen),
196 : result_(result),
197 : input_(input),
198 : stub_mode_(stub_mode),
199 : unwinding_info_writer_(unwinding_info_writer),
200 : isolate_(gen->isolate()),
201 157012 : zone_(gen->zone()) {}
202 :
203 52330 : void Generate() final {
204 313997 : __ subq(rsp, Immediate(kDoubleSize));
205 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 104662 : kDoubleSize);
207 104663 : __ Movsd(MemOperand(rsp, 0), input_);
208 52331 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 : // A direct call to a wasm runtime stub defined in this module.
210 : // Just encode the stub index. This will be patched when the code
211 : // is added to the native module and copied into wasm code space.
212 1445 : __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 : } else {
214 101774 : __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 : }
216 157001 : __ movl(result_, MemOperand(rsp, 0));
217 52333 : __ addq(rsp, Immediate(kDoubleSize));
218 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 104666 : -kDoubleSize);
220 52331 : }
221 :
222 : private:
223 : Register const result_;
224 : XMMRegister const input_;
225 : StubCallMode stub_mode_;
226 : UnwindingInfoWriter* const unwinding_info_writer_;
227 : Isolate* isolate_;
228 : Zone* zone_;
229 : };
230 :
231 0 : class OutOfLineRecordWrite final : public OutOfLineCode {
232 : public:
233 325633 : OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 : Register value, Register scratch0, Register scratch1,
235 : RecordWriteMode mode, StubCallMode stub_mode)
236 : : OutOfLineCode(gen),
237 : object_(object),
238 : operand_(operand),
239 : value_(value),
240 : scratch0_(scratch0),
241 : scratch1_(scratch1),
242 : mode_(mode),
243 : stub_mode_(stub_mode),
244 651266 : zone_(gen->zone()) {}
245 :
246 325633 : void Generate() final {
247 325633 : if (mode_ > RecordWriteMode::kValueIsPointer) {
248 1559316 : __ JumpIfSmi(value_, exit());
249 : }
250 : __ CheckPageFlag(value_, scratch0_,
251 : MemoryChunk::kPointersToHereAreInterestingMask, zero,
252 651266 : exit());
253 325633 : __ leaq(scratch1_, operand_);
254 :
255 : RememberedSetAction const remembered_set_action =
256 : mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
257 325633 : : OMIT_REMEMBERED_SET;
258 : SaveFPRegsMode const save_fp_mode =
259 651266 : frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
260 :
261 325633 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
262 : // A direct call to a wasm runtime stub defined in this module.
263 : // Just encode the stub index. This will be patched when the code
264 : // is added to the native module and copied into wasm code space.
265 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
266 152 : save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
267 : } else {
268 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
269 325481 : save_fp_mode);
270 : }
271 325633 : }
272 :
273 : private:
274 : Register const object_;
275 : Operand const operand_;
276 : Register const value_;
277 : Register const scratch0_;
278 : Register const scratch1_;
279 : RecordWriteMode const mode_;
280 : StubCallMode const stub_mode_;
281 : Zone* zone_;
282 : };
283 :
284 0 : class WasmOutOfLineTrap : public OutOfLineCode {
285 : public:
286 : WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
287 271662 : : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
288 :
289 33443 : void Generate() override {
290 66886 : X64OperandConverter i(gen_, instr_);
291 : TrapId trap_id =
292 33443 : static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
293 : GenerateWithTrapId(trap_id);
294 33451 : }
295 :
296 : protected:
297 : CodeGenerator* gen_;
298 :
299 271599 : void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
300 :
301 : private:
302 271592 : void GenerateCallToTrap(TrapId trap_id) {
303 543253 : if (!gen_->wasm_runtime_exception_support()) {
304 : // We cannot test calls to the runtime in cctest/test-run-wasm.
305 : // Therefore we emit a call to C here instead of a call to the runtime.
306 849874 : __ PrepareCallCFunction(0);
307 : __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
308 306384 : 0);
309 153192 : __ LeaveFrame(StackFrame::WASM_COMPILED);
310 306384 : auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
311 : size_t pop_size =
312 153192 : call_descriptor->StackParameterCount() * kSystemPointerSize;
313 : // Use rcx as a scratch register, we return anyways immediately.
314 306384 : __ Ret(static_cast<int>(pop_size), rcx);
315 : } else {
316 118424 : gen_->AssembleSourcePosition(instr_);
317 : // A direct call to a wasm runtime stub defined in this module.
318 : // Just encode the stub index. This will be patched when the code
319 : // is added to the native module and copied into wasm code space.
320 236998 : __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
321 : ReferenceMap* reference_map =
322 118469 : new (gen_->zone()) ReferenceMap(gen_->zone());
323 : gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
324 118516 : Safepoint::kNoLazyDeopt);
325 118607 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
326 : }
327 271712 : }
328 :
329 : Instruction* instr_;
330 : };
331 :
332 0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
333 : public:
334 : WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
335 238102 : : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
336 :
337 238146 : void Generate() final {
338 476292 : gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
339 238150 : GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
340 238261 : }
341 :
342 : private:
343 : int pc_;
344 : };
345 :
346 11355741 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
347 : InstructionCode opcode, Instruction* instr,
348 : X64OperandConverter& i, int pc) {
349 : const MemoryAccessMode access_mode =
350 11355741 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
351 11355741 : if (access_mode == kMemoryAccessProtected) {
352 : new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
353 : }
354 11355630 : }
355 :
356 10697424 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
357 : InstructionCode opcode, Instruction* instr,
358 : X64OperandConverter& i) {
359 : const MemoryAccessMode access_mode =
360 10697424 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
361 10697424 : if (access_mode == kMemoryAccessPoisoned) {
362 0 : Register value = i.OutputRegister();
363 0 : codegen->tasm()->andq(value, kSpeculationPoisonRegister);
364 : }
365 10697424 : }
366 :
367 : } // namespace
368 :
369 : #define ASSEMBLE_UNOP(asm_instr) \
370 : do { \
371 : if (instr->Output()->IsRegister()) { \
372 : __ asm_instr(i.OutputRegister()); \
373 : } else { \
374 : __ asm_instr(i.OutputOperand()); \
375 : } \
376 : } while (false)
377 :
378 : #define ASSEMBLE_BINOP(asm_instr) \
379 : do { \
380 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
381 : size_t index = 1; \
382 : Operand right = i.MemoryOperand(&index); \
383 : __ asm_instr(i.InputRegister(0), right); \
384 : } else { \
385 : if (HasImmediateInput(instr, 1)) { \
386 : if (instr->InputAt(0)->IsRegister()) { \
387 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
388 : } else { \
389 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
390 : } \
391 : } else { \
392 : if (instr->InputAt(1)->IsRegister()) { \
393 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
394 : } else { \
395 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
396 : } \
397 : } \
398 : } \
399 : } while (false)
400 :
401 : #define ASSEMBLE_COMPARE(asm_instr) \
402 : do { \
403 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
404 : size_t index = 0; \
405 : Operand left = i.MemoryOperand(&index); \
406 : if (HasImmediateInput(instr, index)) { \
407 : __ asm_instr(left, i.InputImmediate(index)); \
408 : } else { \
409 : __ asm_instr(left, i.InputRegister(index)); \
410 : } \
411 : } else { \
412 : if (HasImmediateInput(instr, 1)) { \
413 : if (instr->InputAt(0)->IsRegister()) { \
414 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
415 : } else { \
416 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
417 : } \
418 : } else { \
419 : if (instr->InputAt(1)->IsRegister()) { \
420 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
421 : } else { \
422 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
423 : } \
424 : } \
425 : } \
426 : } while (false)
427 :
428 : #define ASSEMBLE_MULT(asm_instr) \
429 : do { \
430 : if (HasImmediateInput(instr, 1)) { \
431 : if (instr->InputAt(0)->IsRegister()) { \
432 : __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
433 : i.InputImmediate(1)); \
434 : } else { \
435 : __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
436 : i.InputImmediate(1)); \
437 : } \
438 : } else { \
439 : if (instr->InputAt(1)->IsRegister()) { \
440 : __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
441 : } else { \
442 : __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
443 : } \
444 : } \
445 : } while (false)
446 :
447 : #define ASSEMBLE_SHIFT(asm_instr, width) \
448 : do { \
449 : if (HasImmediateInput(instr, 1)) { \
450 : if (instr->Output()->IsRegister()) { \
451 : __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
452 : } else { \
453 : __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
454 : } \
455 : } else { \
456 : if (instr->Output()->IsRegister()) { \
457 : __ asm_instr##_cl(i.OutputRegister()); \
458 : } else { \
459 : __ asm_instr##_cl(i.OutputOperand()); \
460 : } \
461 : } \
462 : } while (false)
463 :
464 : #define ASSEMBLE_MOVX(asm_instr) \
465 : do { \
466 : if (instr->addressing_mode() != kMode_None) { \
467 : __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
468 : } else if (instr->InputAt(0)->IsRegister()) { \
469 : __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
470 : } else { \
471 : __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
472 : } \
473 : } while (false)
474 :
475 : #define ASSEMBLE_SSE_BINOP(asm_instr) \
476 : do { \
477 : if (instr->InputAt(1)->IsFPRegister()) { \
478 : __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
479 : } else { \
480 : __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
481 : } \
482 : } while (false)
483 :
484 : #define ASSEMBLE_SSE_UNOP(asm_instr) \
485 : do { \
486 : if (instr->InputAt(0)->IsFPRegister()) { \
487 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
488 : } else { \
489 : __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
490 : } \
491 : } while (false)
492 :
493 : #define ASSEMBLE_AVX_BINOP(asm_instr) \
494 : do { \
495 : CpuFeatureScope avx_scope(tasm(), AVX); \
496 : if (instr->InputAt(1)->IsFPRegister()) { \
497 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
498 : i.InputDoubleRegister(1)); \
499 : } else { \
500 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
501 : i.InputOperand(1)); \
502 : } \
503 : } while (false)
504 :
505 : #define ASSEMBLE_IEEE754_BINOP(name) \
506 : do { \
507 : __ PrepareCallCFunction(2); \
508 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
509 : } while (false)
510 :
511 : #define ASSEMBLE_IEEE754_UNOP(name) \
512 : do { \
513 : __ PrepareCallCFunction(1); \
514 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
515 : } while (false)
516 :
517 : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
518 : do { \
519 : Label binop; \
520 : __ bind(&binop); \
521 : __ mov_inst(rax, i.MemoryOperand(1)); \
522 : __ movl(i.TempRegister(0), rax); \
523 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
524 : __ lock(); \
525 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
526 : __ j(not_equal, &binop); \
527 : } while (false)
528 :
529 : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
530 : do { \
531 : Label binop; \
532 : __ bind(&binop); \
533 : __ mov_inst(rax, i.MemoryOperand(1)); \
534 : __ movq(i.TempRegister(0), rax); \
535 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
536 : __ lock(); \
537 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
538 : __ j(not_equal, &binop); \
539 : } while (false)
540 :
541 : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
542 : do { \
543 : if (instr->InputAt(index)->IsSimd128Register()) { \
544 : __ opcode(dst_operand, i.InputSimd128Register(index)); \
545 : } else { \
546 : __ opcode(dst_operand, i.InputOperand(index)); \
547 : } \
548 : } while (false)
549 :
550 : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
551 : do { \
552 : if (instr->InputAt(index)->IsSimd128Register()) { \
553 : __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
554 : } else { \
555 : __ opcode(dst_operand, i.InputOperand(index), imm); \
556 : } \
557 : } while (false)
558 :
559 : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
560 : do { \
561 : XMMRegister dst = i.OutputSimd128Register(); \
562 : DCHECK_EQ(dst, i.InputSimd128Register(0)); \
563 : byte input_index = instr->InputCount() == 2 ? 1 : 0; \
564 : ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
565 : } while (false)
566 :
567 : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
568 : do { \
569 : CpuFeatureScope sse_scope(tasm(), SSELevel); \
570 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
571 : __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
572 : } while (false)
573 :
574 2150780 : void CodeGenerator::AssembleDeconstructFrame() {
575 4301560 : unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
576 2150850 : __ movq(rsp, rbp);
577 2150936 : __ popq(rbp);
578 2150955 : }
579 :
580 238592 : void CodeGenerator::AssemblePrepareTailCall() {
581 119296 : if (frame_access_state()->has_frame()) {
582 141056 : __ movq(rbp, MemOperand(rbp, 0));
583 : }
584 : frame_access_state()->SetFrameAccessToSP();
585 119296 : }
586 :
587 1344 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
588 : Register scratch1,
589 : Register scratch2,
590 : Register scratch3) {
591 : DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
592 1344 : Label done;
593 :
594 : // Check if current frame is an arguments adaptor frame.
595 : __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
596 1344 : Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
597 1344 : __ j(not_equal, &done, Label::kNear);
598 :
599 : // Load arguments count from current arguments adaptor frame (note, it
600 : // does not include receiver).
601 1344 : Register caller_args_count_reg = scratch1;
602 : __ SmiUntag(caller_args_count_reg,
603 1344 : Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
604 :
605 : ParameterCount callee_args_count(args_reg);
606 : __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
607 1344 : scratch3);
608 1344 : __ bind(&done);
609 1344 : }
610 :
611 : namespace {
612 :
613 267608 : void AdjustStackPointerForTailCall(Assembler* assembler,
614 : FrameAccessState* state,
615 : int new_slot_above_sp,
616 : bool allow_shrinkage = true) {
617 : int current_sp_offset = state->GetSPToFPSlotCount() +
618 267608 : StandardFrameConstants::kFixedSlotCountAboveFp;
619 267608 : int stack_slot_delta = new_slot_above_sp - current_sp_offset;
620 267608 : if (stack_slot_delta > 0) {
621 1584 : assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
622 : state->IncreaseSPDelta(stack_slot_delta);
623 266816 : } else if (allow_shrinkage && stack_slot_delta < 0) {
624 140336 : assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
625 : state->IncreaseSPDelta(stack_slot_delta);
626 : }
627 267608 : }
628 :
629 2336 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
630 2336 : int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
631 2336 : assembler->movq(kScratchRegister, shuffle_mask);
632 2336 : assembler->Push(kScratchRegister);
633 2336 : shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
634 2336 : assembler->movq(kScratchRegister, shuffle_mask);
635 2336 : assembler->Push(kScratchRegister);
636 2336 : }
637 :
638 : } // namespace
639 :
640 119308 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
641 296600 : int first_unused_stack_slot) {
642 119308 : CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
643 : ZoneVector<MoveOperands*> pushes(zone());
644 119308 : GetPushCompatibleMoves(instr, flags, &pushes);
645 :
646 131408 : if (!pushes.empty() &&
647 24200 : (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
648 : first_unused_stack_slot)) {
649 : X64OperandConverter g(this, instr);
650 53192 : for (auto move : pushes) {
651 : LocationOperand destination_location(
652 : LocationOperand::cast(move->destination()));
653 28992 : InstructionOperand source(move->source());
654 28992 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
655 28992 : destination_location.index());
656 28992 : if (source.IsStackSlot()) {
657 : LocationOperand source_location(LocationOperand::cast(source));
658 12112 : __ Push(g.SlotToOperand(source_location.index()));
659 16880 : } else if (source.IsRegister()) {
660 : LocationOperand source_location(LocationOperand::cast(source));
661 16880 : __ Push(source_location.GetRegister());
662 0 : } else if (source.IsImmediate()) {
663 0 : __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
664 : } else {
665 : // Pushes of non-scalar data types is not supported.
666 0 : UNIMPLEMENTED();
667 : }
668 : frame_access_state()->IncreaseSPDelta(1);
669 : move->Eliminate();
670 : }
671 : }
672 119308 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
673 119308 : first_unused_stack_slot, false);
674 119308 : }
675 :
676 119308 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
677 119308 : int first_unused_stack_slot) {
678 119308 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
679 119308 : first_unused_stack_slot);
680 119308 : }
681 :
682 : // Check that {kJavaScriptCallCodeStartRegister} is correct.
683 110 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
684 110 : __ ComputeCodeStartAddress(rbx);
685 110 : __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
686 110 : __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
687 110 : }
688 :
689 : // Check if the code object is marked for deoptimization. If it is, then it
690 : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
691 : // to:
692 : // 1. read from memory the word that contains that bit, which can be found in
693 : // the flags in the referenced {CodeDataContainer} object;
694 : // 2. test kMarkedForDeoptimizationBit in those flags; and
695 : // 3. if it is not zero then it jumps to the builtin.
696 913364 : void CodeGenerator::BailoutIfDeoptimized() {
697 : int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
698 : __ LoadTaggedPointerField(rbx,
699 456683 : Operand(kJavaScriptCallCodeStartRegister, offset));
700 : __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
701 456685 : Immediate(1 << Code::kMarkedForDeoptimizationBit));
702 : __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
703 456681 : RelocInfo::CODE_TARGET, not_zero);
704 456683 : }
705 :
706 0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
707 : // Set a mask which has all bits set in the normal case, but has all
708 : // bits cleared if we are speculatively executing the wrong PC.
709 0 : __ ComputeCodeStartAddress(rbx);
710 0 : __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
711 0 : __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
712 : __ movq(rbx, Immediate(-1));
713 0 : __ cmovq(equal, kSpeculationPoisonRegister, rbx);
714 0 : }
715 :
716 0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
717 0 : __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
718 0 : __ andq(kContextRegister, kSpeculationPoisonRegister);
719 0 : __ andq(rsp, kSpeculationPoisonRegister);
720 0 : }
721 :
722 : // Assembles an instruction after register allocation, producing machine code.
723 63172867 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
724 84106799 : Instruction* instr) {
725 : X64OperandConverter i(this, instr);
726 : InstructionCode opcode = instr->opcode();
727 63172867 : ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
728 63172867 : switch (arch_opcode) {
729 : case kArchCallCodeObject: {
730 4711970 : if (HasImmediateInput(instr, 0)) {
731 4702936 : Handle<Code> code = i.InputCode(0);
732 4702936 : __ Call(code, RelocInfo::CODE_TARGET);
733 : } else {
734 9048 : Register reg = i.InputRegister(0);
735 : DCHECK_IMPLIES(
736 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
737 : reg == kJavaScriptCallCodeStartRegister);
738 9048 : __ LoadCodeObjectEntry(reg, reg);
739 9048 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
740 0 : __ RetpolineCall(reg);
741 : } else {
742 9048 : __ call(reg);
743 : }
744 : }
745 4711986 : RecordCallPosition(instr);
746 : frame_access_state()->ClearSPDelta();
747 : break;
748 : }
749 : case kArchCallBuiltinPointer: {
750 : DCHECK(!HasImmediateInput(instr, 0));
751 2488 : Register builtin_pointer = i.InputRegister(0);
752 2488 : __ CallBuiltinPointer(builtin_pointer);
753 2488 : RecordCallPosition(instr);
754 : frame_access_state()->ClearSPDelta();
755 : break;
756 : }
757 : case kArchCallWasmFunction: {
758 956186 : if (HasImmediateInput(instr, 0)) {
759 80790 : Constant constant = i.ToConstant(instr->InputAt(0));
760 80806 : Address wasm_code = static_cast<Address>(constant.ToInt64());
761 80806 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
762 80796 : __ near_call(wasm_code, constant.rmode());
763 : } else {
764 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
765 0 : __ RetpolineCall(wasm_code, constant.rmode());
766 : } else {
767 0 : __ Call(wasm_code, constant.rmode());
768 : }
769 : }
770 : } else {
771 875396 : Register reg = i.InputRegister(0);
772 875396 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
773 0 : __ RetpolineCall(reg);
774 : } else {
775 875396 : __ call(reg);
776 : }
777 : }
778 956199 : RecordCallPosition(instr);
779 : frame_access_state()->ClearSPDelta();
780 : break;
781 : }
782 : case kArchTailCallCodeObjectFromJSFunction:
783 : case kArchTailCallCodeObject: {
784 34568 : if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
785 : AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
786 : i.TempRegister(0), i.TempRegister(1),
787 1344 : i.TempRegister(2));
788 : }
789 34568 : if (HasImmediateInput(instr, 0)) {
790 29728 : Handle<Code> code = i.InputCode(0);
791 29728 : __ Jump(code, RelocInfo::CODE_TARGET);
792 : } else {
793 9680 : Register reg = i.InputRegister(0);
794 : DCHECK_IMPLIES(
795 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
796 : reg == kJavaScriptCallCodeStartRegister);
797 4840 : __ LoadCodeObjectEntry(reg, reg);
798 4840 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
799 0 : __ RetpolineJump(reg);
800 : } else {
801 4840 : __ jmp(reg);
802 : }
803 : }
804 : unwinding_info_writer_.MarkBlockWillExit();
805 : frame_access_state()->ClearSPDelta();
806 34568 : frame_access_state()->SetFrameAccessToDefault();
807 34568 : break;
808 : }
809 : case kArchTailCallWasm: {
810 0 : if (HasImmediateInput(instr, 0)) {
811 0 : Constant constant = i.ToConstant(instr->InputAt(0));
812 0 : Address wasm_code = static_cast<Address>(constant.ToInt64());
813 0 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
814 0 : __ near_jmp(wasm_code, constant.rmode());
815 : } else {
816 : __ Move(kScratchRegister, wasm_code, constant.rmode());
817 0 : __ jmp(kScratchRegister);
818 : }
819 : } else {
820 0 : Register reg = i.InputRegister(0);
821 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
822 0 : __ RetpolineJump(reg);
823 : } else {
824 0 : __ jmp(reg);
825 : }
826 : }
827 : unwinding_info_writer_.MarkBlockWillExit();
828 : frame_access_state()->ClearSPDelta();
829 0 : frame_access_state()->SetFrameAccessToDefault();
830 0 : break;
831 : }
832 : case kArchTailCallAddress: {
833 84728 : CHECK(!HasImmediateInput(instr, 0));
834 84728 : Register reg = i.InputRegister(0);
835 : DCHECK_IMPLIES(
836 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
837 : reg == kJavaScriptCallCodeStartRegister);
838 84728 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
839 0 : __ RetpolineJump(reg);
840 : } else {
841 84728 : __ jmp(reg);
842 : }
843 : unwinding_info_writer_.MarkBlockWillExit();
844 : frame_access_state()->ClearSPDelta();
845 84728 : frame_access_state()->SetFrameAccessToDefault();
846 : break;
847 : }
848 : case kArchCallJSFunction: {
849 : Register func = i.InputRegister(0);
850 23793 : if (FLAG_debug_code) {
851 : // Check the function's context matches the context argument.
852 8 : __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
853 8 : __ Assert(equal, AbortReason::kWrongFunctionContext);
854 : }
855 : static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
856 : __ LoadTaggedPointerField(rcx,
857 23793 : FieldOperand(func, JSFunction::kCodeOffset));
858 23793 : __ CallCodeObject(rcx);
859 : frame_access_state()->ClearSPDelta();
860 23793 : RecordCallPosition(instr);
861 : break;
862 : }
863 : case kArchPrepareCallCFunction: {
864 : // Frame alignment requires using FP-relative frame addressing.
865 : frame_access_state()->SetFrameAccessToFP();
866 26201 : int const num_parameters = MiscField::decode(instr->opcode());
867 26201 : __ PrepareCallCFunction(num_parameters);
868 26201 : break;
869 : }
870 : case kArchSaveCallerRegisters: {
871 : fp_mode_ =
872 676 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
873 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
874 : // kReturnRegister0 should have been saved before entering the stub.
875 676 : int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
876 : DCHECK(IsAligned(bytes, kSystemPointerSize));
877 : DCHECK_EQ(0, frame_access_state()->sp_delta());
878 676 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
879 : DCHECK(!caller_registers_saved_);
880 676 : caller_registers_saved_ = true;
881 676 : break;
882 : }
883 : case kArchRestoreCallerRegisters: {
884 : DCHECK(fp_mode_ ==
885 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
886 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
887 : // Don't overwrite the returned value.
888 676 : int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
889 676 : frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
890 : DCHECK_EQ(0, frame_access_state()->sp_delta());
891 : DCHECK(caller_registers_saved_);
892 676 : caller_registers_saved_ = false;
893 676 : break;
894 : }
895 : case kArchPrepareTailCall:
896 119296 : AssemblePrepareTailCall();
897 119296 : break;
898 : case kArchCallCFunction: {
899 : int const num_parameters = MiscField::decode(instr->opcode());
900 26201 : if (HasImmediateInput(instr, 0)) {
901 25061 : ExternalReference ref = i.InputExternalReference(0);
902 25061 : __ CallCFunction(ref, num_parameters);
903 : } else {
904 1140 : Register func = i.InputRegister(0);
905 1140 : __ CallCFunction(func, num_parameters);
906 : }
907 26201 : frame_access_state()->SetFrameAccessToDefault();
908 : // Ideally, we should decrement SP delta to match the change of stack
909 : // pointer in CallCFunction. However, for certain architectures (e.g.
910 : // ARM), there may be more strict alignment requirement, causing old SP
911 : // to be saved on the stack. In those cases, we can not calculate the SP
912 : // delta statically.
913 : frame_access_state()->ClearSPDelta();
914 26201 : if (caller_registers_saved_) {
915 : // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
916 : // Here, we assume the sequence to be:
917 : // kArchSaveCallerRegisters;
918 : // kArchCallCFunction;
919 : // kArchRestoreCallerRegisters;
920 : int bytes =
921 676 : __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
922 676 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
923 : }
924 : // TODO(tebbi): Do we need an lfence here?
925 : break;
926 : }
927 : case kArchJmp:
928 4930087 : AssembleArchJump(i.InputRpo(0));
929 4930082 : break;
930 : case kArchBinarySearchSwitch:
931 34722 : AssembleArchBinarySearchSwitch(instr);
932 34722 : break;
933 : case kArchLookupSwitch:
934 0 : AssembleArchLookupSwitch(instr);
935 0 : break;
936 : case kArchTableSwitch:
937 314 : AssembleArchTableSwitch(instr);
938 314 : break;
939 : case kArchComment:
940 4 : __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
941 4 : break;
942 : case kArchDebugAbort:
943 : DCHECK(i.InputRegister(0) == rdx);
944 152 : if (!frame_access_state()->has_frame()) {
945 : // We don't actually want to generate a pile of code for this, so just
946 : // claim there is a stack frame, without generating one.
947 8 : FrameScope scope(tasm(), StackFrame::NONE);
948 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
949 8 : RelocInfo::CODE_TARGET);
950 : } else {
951 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
952 144 : RelocInfo::CODE_TARGET);
953 : }
954 152 : __ int3();
955 : unwinding_info_writer_.MarkBlockWillExit();
956 : break;
957 : case kArchDebugBreak:
958 358834 : __ int3();
959 358834 : break;
960 : case kArchThrowTerminator:
961 : unwinding_info_writer_.MarkBlockWillExit();
962 : break;
963 : case kArchNop:
964 : // don't emit code for nops.
965 : break;
966 : case kArchDeoptimize: {
967 : int deopt_state_id =
968 45611 : BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
969 : CodeGenResult result =
970 45611 : AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
971 45611 : if (result != kSuccess) return result;
972 : unwinding_info_writer_.MarkBlockWillExit();
973 : break;
974 : }
975 : case kArchRet:
976 2426466 : AssembleReturn(instr->InputAt(0));
977 2426457 : break;
978 : case kArchStackPointer:
979 0 : __ movq(i.OutputRegister(), rsp);
980 : break;
981 : case kArchFramePointer:
982 31038 : __ movq(i.OutputRegister(), rbp);
983 : break;
984 : case kArchParentFramePointer:
985 50404 : if (frame_access_state()->has_frame()) {
986 80652 : __ movq(i.OutputRegister(), Operand(rbp, 0));
987 : } else {
988 23520 : __ movq(i.OutputRegister(), rbp);
989 : }
990 : break;
991 : case kArchTruncateDoubleToI: {
992 : auto result = i.OutputRegister();
993 : auto input = i.InputDoubleRegister(0);
994 : auto ool = new (zone()) OutOfLineTruncateDoubleToI(
995 : this, result, input, DetermineStubCallMode(),
996 104678 : &unwinding_info_writer_);
997 : // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
998 : // use of Cvttsd2siq requires the movl below to avoid sign extension.
999 52336 : __ Cvttsd2siq(result, input);
1000 52340 : __ cmpq(result, Immediate(1));
1001 52340 : __ j(overflow, ool->entry());
1002 52341 : __ bind(ool->exit());
1003 : __ movl(result, result);
1004 : break;
1005 : }
1006 : case kArchStoreWithWriteBarrier: {
1007 : RecordWriteMode mode =
1008 : static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1009 : Register object = i.InputRegister(0);
1010 325633 : size_t index = 0;
1011 325633 : Operand operand = i.MemoryOperand(&index);
1012 325633 : Register value = i.InputRegister(index);
1013 : Register scratch0 = i.TempRegister(0);
1014 : Register scratch1 = i.TempRegister(1);
1015 : auto ool = new (zone())
1016 : OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1017 651266 : mode, DetermineStubCallMode());
1018 325633 : __ StoreTaggedField(operand, value);
1019 : __ CheckPageFlag(object, scratch0,
1020 : MemoryChunk::kPointersFromHereAreInterestingMask,
1021 325633 : not_zero, ool->entry());
1022 325633 : __ bind(ool->exit());
1023 : break;
1024 : }
1025 : case kArchWordPoisonOnSpeculation:
1026 : DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1027 0 : __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1028 0 : break;
1029 : case kLFence:
1030 0 : __ lfence();
1031 0 : break;
1032 : case kArchStackSlot: {
1033 : FrameOffset offset =
1034 4918 : frame_access_state()->GetFrameOffset(i.InputInt32(0));
1035 2459 : Register base = offset.from_stack_pointer() ? rsp : rbp;
1036 7377 : __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1037 : break;
1038 : }
1039 : case kIeee754Float64Acos:
1040 116 : ASSEMBLE_IEEE754_UNOP(acos);
1041 116 : break;
1042 : case kIeee754Float64Acosh:
1043 116 : ASSEMBLE_IEEE754_UNOP(acosh);
1044 116 : break;
1045 : case kIeee754Float64Asin:
1046 116 : ASSEMBLE_IEEE754_UNOP(asin);
1047 116 : break;
1048 : case kIeee754Float64Asinh:
1049 116 : ASSEMBLE_IEEE754_UNOP(asinh);
1050 116 : break;
1051 : case kIeee754Float64Atan:
1052 133 : ASSEMBLE_IEEE754_UNOP(atan);
1053 133 : break;
1054 : case kIeee754Float64Atanh:
1055 116 : ASSEMBLE_IEEE754_UNOP(atanh);
1056 116 : break;
1057 : case kIeee754Float64Atan2:
1058 129 : ASSEMBLE_IEEE754_BINOP(atan2);
1059 129 : break;
1060 : case kIeee754Float64Cbrt:
1061 116 : ASSEMBLE_IEEE754_UNOP(cbrt);
1062 116 : break;
1063 : case kIeee754Float64Cos:
1064 265 : ASSEMBLE_IEEE754_UNOP(cos);
1065 265 : break;
1066 : case kIeee754Float64Cosh:
1067 123 : ASSEMBLE_IEEE754_UNOP(cosh);
1068 123 : break;
1069 : case kIeee754Float64Exp:
1070 148 : ASSEMBLE_IEEE754_UNOP(exp);
1071 148 : break;
1072 : case kIeee754Float64Expm1:
1073 123 : ASSEMBLE_IEEE754_UNOP(expm1);
1074 123 : break;
1075 : case kIeee754Float64Log:
1076 284 : ASSEMBLE_IEEE754_UNOP(log);
1077 284 : break;
1078 : case kIeee754Float64Log1p:
1079 116 : ASSEMBLE_IEEE754_UNOP(log1p);
1080 116 : break;
1081 : case kIeee754Float64Log2:
1082 116 : ASSEMBLE_IEEE754_UNOP(log2);
1083 116 : break;
1084 : case kIeee754Float64Log10:
1085 116 : ASSEMBLE_IEEE754_UNOP(log10);
1086 116 : break;
1087 : case kIeee754Float64Pow:
1088 336 : ASSEMBLE_IEEE754_BINOP(pow);
1089 336 : break;
1090 : case kIeee754Float64Sin:
1091 268 : ASSEMBLE_IEEE754_UNOP(sin);
1092 268 : break;
1093 : case kIeee754Float64Sinh:
1094 123 : ASSEMBLE_IEEE754_UNOP(sinh);
1095 123 : break;
1096 : case kIeee754Float64Tan:
1097 168 : ASSEMBLE_IEEE754_UNOP(tan);
1098 168 : break;
1099 : case kIeee754Float64Tanh:
1100 123 : ASSEMBLE_IEEE754_UNOP(tanh);
1101 123 : break;
1102 : case kX64Add32:
1103 357994 : ASSEMBLE_BINOP(addl);
1104 : break;
1105 : case kX64Add:
1106 348496 : ASSEMBLE_BINOP(addq);
1107 : break;
1108 : case kX64Sub32:
1109 200812 : ASSEMBLE_BINOP(subl);
1110 : break;
1111 : case kX64Sub:
1112 227661 : ASSEMBLE_BINOP(subq);
1113 : break;
1114 : case kX64And32:
1115 851971 : ASSEMBLE_BINOP(andl);
1116 : break;
1117 : case kX64And:
1118 1238677 : ASSEMBLE_BINOP(andq);
1119 : break;
1120 : case kX64Cmp8:
1121 39955 : ASSEMBLE_COMPARE(cmpb);
1122 : break;
1123 : case kX64Cmp16:
1124 1603028 : ASSEMBLE_COMPARE(cmpw);
1125 : break;
1126 : case kX64Cmp32:
1127 4796358 : ASSEMBLE_COMPARE(cmpl);
1128 : break;
1129 : case kX64Cmp:
1130 9237396 : ASSEMBLE_COMPARE(cmpq);
1131 : break;
1132 : case kX64Test8:
1133 331384 : ASSEMBLE_COMPARE(testb);
1134 : break;
1135 : case kX64Test16:
1136 122375 : ASSEMBLE_COMPARE(testw);
1137 : break;
1138 : case kX64Test32:
1139 551256 : ASSEMBLE_COMPARE(testl);
1140 : break;
1141 : case kX64Test:
1142 3230226 : ASSEMBLE_COMPARE(testq);
1143 : break;
1144 : case kX64Imul32:
1145 203677 : ASSEMBLE_MULT(imull);
1146 : break;
1147 : case kX64Imul:
1148 83176 : ASSEMBLE_MULT(imulq);
1149 : break;
1150 : case kX64ImulHigh32:
1151 7522 : if (instr->InputAt(1)->IsRegister()) {
1152 3761 : __ imull(i.InputRegister(1));
1153 : } else {
1154 0 : __ imull(i.InputOperand(1));
1155 : }
1156 : break;
1157 : case kX64UmulHigh32:
1158 2874 : if (instr->InputAt(1)->IsRegister()) {
1159 1437 : __ mull(i.InputRegister(1));
1160 : } else {
1161 0 : __ mull(i.InputOperand(1));
1162 : }
1163 : break;
1164 : case kX64Idiv32:
1165 31991 : __ cdq();
1166 31991 : __ idivl(i.InputRegister(1));
1167 : break;
1168 : case kX64Idiv:
1169 2732 : __ cqo();
1170 2732 : __ idivq(i.InputRegister(1));
1171 : break;
1172 : case kX64Udiv32:
1173 29099 : __ xorl(rdx, rdx);
1174 29099 : __ divl(i.InputRegister(1));
1175 : break;
1176 : case kX64Udiv:
1177 1768 : __ xorq(rdx, rdx);
1178 1768 : __ divq(i.InputRegister(1));
1179 : break;
1180 : case kX64Not:
1181 88 : ASSEMBLE_UNOP(notq);
1182 : break;
1183 : case kX64Not32:
1184 5562 : ASSEMBLE_UNOP(notl);
1185 : break;
1186 : case kX64Neg:
1187 19448 : ASSEMBLE_UNOP(negq);
1188 : break;
1189 : case kX64Neg32:
1190 11860 : ASSEMBLE_UNOP(negl);
1191 : break;
1192 : case kX64Or32:
1193 344353 : ASSEMBLE_BINOP(orl);
1194 : break;
1195 : case kX64Or:
1196 374048 : ASSEMBLE_BINOP(orq);
1197 : break;
1198 : case kX64Xor32:
1199 92295 : ASSEMBLE_BINOP(xorl);
1200 : break;
1201 : case kX64Xor:
1202 1116 : ASSEMBLE_BINOP(xorq);
1203 : break;
1204 : case kX64Shl32:
1205 118111 : ASSEMBLE_SHIFT(shll, 5);
1206 : break;
1207 : case kX64Shl:
1208 1456216 : ASSEMBLE_SHIFT(shlq, 6);
1209 : break;
1210 : case kX64Shr32:
1211 430037 : ASSEMBLE_SHIFT(shrl, 5);
1212 : break;
1213 : case kX64Shr:
1214 1421058 : ASSEMBLE_SHIFT(shrq, 6);
1215 : break;
1216 : case kX64Sar32:
1217 96660 : ASSEMBLE_SHIFT(sarl, 5);
1218 : break;
1219 : case kX64Sar:
1220 749872 : ASSEMBLE_SHIFT(sarq, 6);
1221 : break;
1222 : case kX64Ror32:
1223 110735 : ASSEMBLE_SHIFT(rorl, 5);
1224 : break;
1225 : case kX64Ror:
1226 368 : ASSEMBLE_SHIFT(rorq, 6);
1227 : break;
1228 : case kX64Lzcnt:
1229 72 : if (instr->InputAt(0)->IsRegister()) {
1230 36 : __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1231 : } else {
1232 0 : __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1233 : }
1234 : break;
1235 : case kX64Lzcnt32:
1236 892 : if (instr->InputAt(0)->IsRegister()) {
1237 446 : __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1238 : } else {
1239 0 : __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1240 : }
1241 : break;
1242 : case kX64Tzcnt:
1243 88 : if (instr->InputAt(0)->IsRegister()) {
1244 44 : __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1245 : } else {
1246 0 : __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1247 : }
1248 : break;
1249 : case kX64Tzcnt32:
1250 664 : if (instr->InputAt(0)->IsRegister()) {
1251 332 : __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1252 : } else {
1253 0 : __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1254 : }
1255 : break;
1256 : case kX64Popcnt:
1257 88 : if (instr->InputAt(0)->IsRegister()) {
1258 44 : __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1259 : } else {
1260 0 : __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1261 : }
1262 : break;
1263 : case kX64Popcnt32:
1264 128 : if (instr->InputAt(0)->IsRegister()) {
1265 64 : __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1266 : } else {
1267 0 : __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1268 : }
1269 : break;
1270 : case kX64Bswap:
1271 12 : __ bswapq(i.OutputRegister());
1272 12 : break;
1273 : case kX64Bswap32:
1274 44 : __ bswapl(i.OutputRegister());
1275 44 : break;
1276 : case kSSEFloat32Cmp:
1277 0 : ASSEMBLE_SSE_BINOP(Ucomiss);
1278 : break;
1279 : case kSSEFloat32Add:
1280 0 : ASSEMBLE_SSE_BINOP(addss);
1281 : break;
1282 : case kSSEFloat32Sub:
1283 0 : ASSEMBLE_SSE_BINOP(subss);
1284 : break;
1285 : case kSSEFloat32Mul:
1286 0 : ASSEMBLE_SSE_BINOP(mulss);
1287 : break;
1288 : case kSSEFloat32Div:
1289 0 : ASSEMBLE_SSE_BINOP(divss);
1290 : // Don't delete this mov. It may improve performance on some CPUs,
1291 : // when there is a (v)mulss depending on the result.
1292 0 : __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1293 0 : break;
1294 : case kSSEFloat32Abs: {
1295 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1296 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1297 0 : __ psrlq(kScratchDoubleReg, 33);
1298 0 : __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1299 0 : break;
1300 : }
1301 : case kSSEFloat32Neg: {
1302 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1303 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1304 0 : __ psllq(kScratchDoubleReg, 31);
1305 0 : __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1306 0 : break;
1307 : }
1308 : case kSSEFloat32Sqrt:
1309 522 : ASSEMBLE_SSE_UNOP(sqrtss);
1310 : break;
1311 : case kSSEFloat32ToFloat64:
1312 63846 : ASSEMBLE_SSE_UNOP(Cvtss2sd);
1313 : break;
1314 : case kSSEFloat32Round: {
1315 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1316 : RoundingMode const mode =
1317 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1318 : __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1319 : break;
1320 : }
1321 : case kSSEFloat32ToInt32:
1322 696 : if (instr->InputAt(0)->IsFPRegister()) {
1323 348 : __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1324 : } else {
1325 0 : __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1326 : }
1327 : break;
1328 : case kSSEFloat32ToUint32: {
1329 112 : if (instr->InputAt(0)->IsFPRegister()) {
1330 56 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1331 : } else {
1332 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1333 : }
1334 : break;
1335 : }
1336 : case kSSEFloat64Cmp:
1337 3572 : ASSEMBLE_SSE_BINOP(Ucomisd);
1338 : break;
1339 : case kSSEFloat64Add:
1340 966 : ASSEMBLE_SSE_BINOP(addsd);
1341 : break;
1342 : case kSSEFloat64Sub:
1343 666 : ASSEMBLE_SSE_BINOP(subsd);
1344 : break;
1345 : case kSSEFloat64Mul:
1346 120 : ASSEMBLE_SSE_BINOP(mulsd);
1347 : break;
1348 : case kSSEFloat64Div:
1349 78 : ASSEMBLE_SSE_BINOP(divsd);
1350 : // Don't delete this mov. It may improve performance on some CPUs,
1351 : // when there is a (v)mulsd depending on the result.
1352 26 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1353 : break;
1354 : case kSSEFloat64Mod: {
1355 1614 : __ subq(rsp, Immediate(kDoubleSize));
1356 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1357 3228 : kDoubleSize);
1358 : // Move values to st(0) and st(1).
1359 4842 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1360 1614 : __ fld_d(Operand(rsp, 0));
1361 4842 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1362 1614 : __ fld_d(Operand(rsp, 0));
1363 : // Loop while fprem isn't done.
1364 1614 : Label mod_loop;
1365 1614 : __ bind(&mod_loop);
1366 : // This instructions traps on all kinds inputs, but we are assuming the
1367 : // floating point control word is set to ignore them all.
1368 1614 : __ fprem();
1369 : // The following 2 instruction implicitly use rax.
1370 1614 : __ fnstsw_ax();
1371 1614 : if (CpuFeatures::IsSupported(SAHF)) {
1372 : CpuFeatureScope sahf_scope(tasm(), SAHF);
1373 1582 : __ sahf();
1374 : } else {
1375 : __ shrl(rax, Immediate(8));
1376 32 : __ andl(rax, Immediate(0xFF));
1377 32 : __ pushq(rax);
1378 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1379 64 : kSystemPointerSize);
1380 32 : __ popfq();
1381 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1382 64 : -kSystemPointerSize);
1383 : }
1384 1614 : __ j(parity_even, &mod_loop);
1385 : // Move output to stack and clean up.
1386 1614 : __ fstp(1);
1387 1614 : __ fstp_d(Operand(rsp, 0));
1388 3228 : __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1389 1614 : __ addq(rsp, Immediate(kDoubleSize));
1390 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1391 3228 : -kDoubleSize);
1392 : break;
1393 : }
1394 : case kSSEFloat32Max: {
1395 66 : Label compare_swap, done_compare;
1396 132 : if (instr->InputAt(1)->IsFPRegister()) {
1397 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1398 : } else {
1399 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1400 : }
1401 : auto ool =
1402 66 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1403 66 : __ j(parity_even, ool->entry());
1404 66 : __ j(above, &done_compare, Label::kNear);
1405 66 : __ j(below, &compare_swap, Label::kNear);
1406 66 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1407 : __ testl(kScratchRegister, Immediate(1));
1408 66 : __ j(zero, &done_compare, Label::kNear);
1409 66 : __ bind(&compare_swap);
1410 132 : if (instr->InputAt(1)->IsFPRegister()) {
1411 66 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1412 : } else {
1413 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1414 : }
1415 66 : __ bind(&done_compare);
1416 66 : __ bind(ool->exit());
1417 : break;
1418 : }
1419 : case kSSEFloat32Min: {
1420 66 : Label compare_swap, done_compare;
1421 132 : if (instr->InputAt(1)->IsFPRegister()) {
1422 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1423 : } else {
1424 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1425 : }
1426 : auto ool =
1427 66 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1428 66 : __ j(parity_even, ool->entry());
1429 66 : __ j(below, &done_compare, Label::kNear);
1430 66 : __ j(above, &compare_swap, Label::kNear);
1431 132 : if (instr->InputAt(1)->IsFPRegister()) {
1432 66 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1433 : } else {
1434 0 : __ Movss(kScratchDoubleReg, i.InputOperand(1));
1435 : __ Movmskps(kScratchRegister, kScratchDoubleReg);
1436 : }
1437 : __ testl(kScratchRegister, Immediate(1));
1438 66 : __ j(zero, &done_compare, Label::kNear);
1439 66 : __ bind(&compare_swap);
1440 132 : if (instr->InputAt(1)->IsFPRegister()) {
1441 66 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1442 : } else {
1443 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1444 : }
1445 66 : __ bind(&done_compare);
1446 66 : __ bind(ool->exit());
1447 : break;
1448 : }
1449 : case kSSEFloat64Max: {
1450 250 : Label compare_swap, done_compare;
1451 500 : if (instr->InputAt(1)->IsFPRegister()) {
1452 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1453 : } else {
1454 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1455 : }
1456 : auto ool =
1457 250 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1458 250 : __ j(parity_even, ool->entry());
1459 250 : __ j(above, &done_compare, Label::kNear);
1460 250 : __ j(below, &compare_swap, Label::kNear);
1461 250 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1462 : __ testl(kScratchRegister, Immediate(1));
1463 250 : __ j(zero, &done_compare, Label::kNear);
1464 250 : __ bind(&compare_swap);
1465 500 : if (instr->InputAt(1)->IsFPRegister()) {
1466 250 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1467 : } else {
1468 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1469 : }
1470 250 : __ bind(&done_compare);
1471 250 : __ bind(ool->exit());
1472 : break;
1473 : }
1474 : case kSSEFloat64Min: {
1475 335 : Label compare_swap, done_compare;
1476 670 : if (instr->InputAt(1)->IsFPRegister()) {
1477 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1478 : } else {
1479 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1480 : }
1481 : auto ool =
1482 335 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1483 335 : __ j(parity_even, ool->entry());
1484 335 : __ j(below, &done_compare, Label::kNear);
1485 335 : __ j(above, &compare_swap, Label::kNear);
1486 670 : if (instr->InputAt(1)->IsFPRegister()) {
1487 335 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1488 : } else {
1489 0 : __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1490 : __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1491 : }
1492 : __ testl(kScratchRegister, Immediate(1));
1493 335 : __ j(zero, &done_compare, Label::kNear);
1494 335 : __ bind(&compare_swap);
1495 670 : if (instr->InputAt(1)->IsFPRegister()) {
1496 335 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1497 : } else {
1498 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1499 : }
1500 335 : __ bind(&done_compare);
1501 335 : __ bind(ool->exit());
1502 : break;
1503 : }
1504 : case kSSEFloat64Abs: {
1505 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1506 6 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1507 6 : __ psrlq(kScratchDoubleReg, 1);
1508 12 : __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1509 6 : break;
1510 : }
1511 : case kSSEFloat64Neg: {
1512 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1513 76 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1514 76 : __ psllq(kScratchDoubleReg, 63);
1515 152 : __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1516 76 : break;
1517 : }
1518 : case kSSEFloat64Sqrt:
1519 830 : ASSEMBLE_SSE_UNOP(Sqrtsd);
1520 : break;
1521 : case kSSEFloat64Round: {
1522 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1523 : RoundingMode const mode =
1524 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1525 : __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1526 : break;
1527 : }
1528 : case kSSEFloat64ToFloat32:
1529 53814 : ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1530 : break;
1531 : case kSSEFloat64ToInt32:
1532 251060 : if (instr->InputAt(0)->IsFPRegister()) {
1533 105217 : __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1534 : } else {
1535 40626 : __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1536 : }
1537 : break;
1538 : case kSSEFloat64ToUint32: {
1539 1520 : if (instr->InputAt(0)->IsFPRegister()) {
1540 760 : __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1541 : } else {
1542 0 : __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1543 : }
1544 1520 : if (MiscField::decode(instr->opcode())) {
1545 1400 : __ AssertZeroExtended(i.OutputRegister());
1546 : }
1547 : break;
1548 : }
1549 : case kSSEFloat32ToInt64:
1550 104 : if (instr->InputAt(0)->IsFPRegister()) {
1551 52 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1552 : } else {
1553 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1554 : }
1555 52 : if (instr->OutputCount() > 1) {
1556 96 : __ Set(i.OutputRegister(1), 1);
1557 48 : Label done;
1558 48 : Label fail;
1559 : __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1560 96 : if (instr->InputAt(0)->IsFPRegister()) {
1561 48 : __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1562 : } else {
1563 0 : __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1564 : }
1565 : // If the input is NaN, then the conversion fails.
1566 48 : __ j(parity_even, &fail);
1567 : // If the input is INT64_MIN, then the conversion succeeds.
1568 48 : __ j(equal, &done);
1569 96 : __ cmpq(i.OutputRegister(0), Immediate(1));
1570 : // If the conversion results in INT64_MIN, but the input was not
1571 : // INT64_MIN, then the conversion fails.
1572 48 : __ j(no_overflow, &done);
1573 48 : __ bind(&fail);
1574 96 : __ Set(i.OutputRegister(1), 0);
1575 48 : __ bind(&done);
1576 : }
1577 : break;
1578 : case kSSEFloat64ToInt64:
1579 1440 : if (instr->InputAt(0)->IsFPRegister()) {
1580 719 : __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1581 : } else {
1582 2 : __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1583 : }
1584 722 : if (instr->OutputCount() > 1) {
1585 1214 : __ Set(i.OutputRegister(1), 1);
1586 608 : Label done;
1587 608 : Label fail;
1588 : __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1589 1216 : if (instr->InputAt(0)->IsFPRegister()) {
1590 608 : __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1591 : } else {
1592 0 : __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1593 : }
1594 : // If the input is NaN, then the conversion fails.
1595 608 : __ j(parity_even, &fail);
1596 : // If the input is INT64_MIN, then the conversion succeeds.
1597 608 : __ j(equal, &done);
1598 1216 : __ cmpq(i.OutputRegister(0), Immediate(1));
1599 : // If the conversion results in INT64_MIN, but the input was not
1600 : // INT64_MIN, then the conversion fails.
1601 606 : __ j(no_overflow, &done);
1602 605 : __ bind(&fail);
1603 1212 : __ Set(i.OutputRegister(1), 0);
1604 607 : __ bind(&done);
1605 : }
1606 : break;
1607 : case kSSEFloat32ToUint64: {
1608 52 : Label fail;
1609 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1610 104 : if (instr->InputAt(0)->IsFPRegister()) {
1611 104 : __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1612 : } else {
1613 0 : __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1614 : }
1615 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1616 52 : __ bind(&fail);
1617 : break;
1618 : }
1619 : case kSSEFloat64ToUint64: {
1620 3756 : Label fail;
1621 3812 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1622 7512 : if (instr->InputAt(0)->IsFPRegister()) {
1623 7512 : __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1624 : } else {
1625 0 : __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1626 : }
1627 3812 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1628 3756 : __ bind(&fail);
1629 : break;
1630 : }
1631 : case kSSEInt32ToFloat64:
1632 745152 : if (instr->InputAt(0)->IsRegister()) {
1633 368662 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1634 : } else {
1635 7828 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1636 : }
1637 : break;
1638 : case kSSEInt32ToFloat32:
1639 1920 : if (instr->InputAt(0)->IsRegister()) {
1640 952 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1641 : } else {
1642 16 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1643 : }
1644 : break;
1645 : case kSSEInt64ToFloat32:
1646 64 : if (instr->InputAt(0)->IsRegister()) {
1647 32 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1648 : } else {
1649 0 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1650 : }
1651 : break;
1652 : case kSSEInt64ToFloat64:
1653 6394 : if (instr->InputAt(0)->IsRegister()) {
1654 1079 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1655 : } else {
1656 4236 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1657 : }
1658 : break;
1659 : case kSSEUint64ToFloat32:
1660 64 : if (instr->InputAt(0)->IsRegister()) {
1661 32 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1662 : } else {
1663 0 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1664 : }
1665 : break;
1666 : case kSSEUint64ToFloat64:
1667 6992 : if (instr->InputAt(0)->IsRegister()) {
1668 2376 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1669 : } else {
1670 2240 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1671 : }
1672 : break;
1673 : case kSSEUint32ToFloat64:
1674 21492 : if (instr->InputAt(0)->IsRegister()) {
1675 402 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1676 : } else {
1677 20688 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1678 : }
1679 : break;
1680 : case kSSEUint32ToFloat32:
1681 176 : if (instr->InputAt(0)->IsRegister()) {
1682 88 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1683 : } else {
1684 0 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1685 : }
1686 : break;
1687 : case kSSEFloat64ExtractLowWord32:
1688 232 : if (instr->InputAt(0)->IsFPStackSlot()) {
1689 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
1690 : } else {
1691 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1692 : }
1693 : break;
1694 : case kSSEFloat64ExtractHighWord32:
1695 186458 : if (instr->InputAt(0)->IsFPStackSlot()) {
1696 117898 : __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1697 : } else {
1698 34280 : __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1699 : }
1700 : break;
1701 : case kSSEFloat64InsertLowWord32:
1702 8 : if (instr->InputAt(1)->IsRegister()) {
1703 4 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1704 : } else {
1705 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1706 : }
1707 : break;
1708 : case kSSEFloat64InsertHighWord32:
1709 232 : if (instr->InputAt(1)->IsRegister()) {
1710 116 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1711 : } else {
1712 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1713 : }
1714 : break;
1715 : case kSSEFloat64LoadLowWord32:
1716 224 : if (instr->InputAt(0)->IsRegister()) {
1717 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1718 : } else {
1719 0 : __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1720 : }
1721 : break;
1722 : case kAVXFloat32Cmp: {
1723 : CpuFeatureScope avx_scope(tasm(), AVX);
1724 3770 : if (instr->InputAt(1)->IsFPRegister()) {
1725 1867 : __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1726 : } else {
1727 36 : __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1728 : }
1729 : break;
1730 : }
1731 : case kAVXFloat32Add:
1732 5331 : ASSEMBLE_AVX_BINOP(vaddss);
1733 : break;
1734 : case kAVXFloat32Sub:
1735 7683 : ASSEMBLE_AVX_BINOP(vsubss);
1736 : break;
1737 : case kAVXFloat32Mul:
1738 2619 : ASSEMBLE_AVX_BINOP(vmulss);
1739 : break;
1740 : case kAVXFloat32Div:
1741 1059 : ASSEMBLE_AVX_BINOP(vdivss);
1742 : // Don't delete this mov. It may improve performance on some CPUs,
1743 : // when there is a (v)mulss depending on the result.
1744 353 : __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1745 : break;
1746 : case kAVXFloat64Cmp: {
1747 : CpuFeatureScope avx_scope(tasm(), AVX);
1748 512950 : if (instr->InputAt(1)->IsFPRegister()) {
1749 235797 : __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1750 : } else {
1751 41356 : __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1752 : }
1753 : break;
1754 : }
1755 : case kAVXFloat64Add:
1756 239616 : ASSEMBLE_AVX_BINOP(vaddsd);
1757 : break;
1758 : case kAVXFloat64Sub:
1759 44814 : ASSEMBLE_AVX_BINOP(vsubsd);
1760 : break;
1761 : case kAVXFloat64Mul:
1762 37497 : ASSEMBLE_AVX_BINOP(vmulsd);
1763 : break;
1764 : case kAVXFloat64Div:
1765 35697 : ASSEMBLE_AVX_BINOP(vdivsd);
1766 : // Don't delete this mov. It may improve performance on some CPUs,
1767 : // when there is a (v)mulsd depending on the result.
1768 11899 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1769 : break;
1770 : case kAVXFloat32Abs: {
1771 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1772 : CpuFeatureScope avx_scope(tasm(), AVX);
1773 66 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1774 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1775 132 : if (instr->InputAt(0)->IsFPRegister()) {
1776 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1777 66 : i.InputDoubleRegister(0));
1778 : } else {
1779 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1780 0 : i.InputOperand(0));
1781 : }
1782 : break;
1783 : }
1784 : case kAVXFloat32Neg: {
1785 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1786 : CpuFeatureScope avx_scope(tasm(), AVX);
1787 168 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1788 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1789 336 : if (instr->InputAt(0)->IsFPRegister()) {
1790 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1791 168 : i.InputDoubleRegister(0));
1792 : } else {
1793 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1794 0 : i.InputOperand(0));
1795 : }
1796 : break;
1797 : }
1798 : case kAVXFloat64Abs: {
1799 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1800 : CpuFeatureScope avx_scope(tasm(), AVX);
1801 615 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1802 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1803 1230 : if (instr->InputAt(0)->IsFPRegister()) {
1804 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1805 615 : i.InputDoubleRegister(0));
1806 : } else {
1807 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1808 0 : i.InputOperand(0));
1809 : }
1810 : break;
1811 : }
1812 : case kAVXFloat64Neg: {
1813 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1814 : CpuFeatureScope avx_scope(tasm(), AVX);
1815 9657 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1816 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1817 19314 : if (instr->InputAt(0)->IsFPRegister()) {
1818 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1819 9574 : i.InputDoubleRegister(0));
1820 : } else {
1821 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1822 83 : i.InputOperand(0));
1823 : }
1824 : break;
1825 : }
1826 : case kSSEFloat64SilenceNaN:
1827 : __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1828 5907 : __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1829 : break;
1830 : case kX64Movsxbl:
1831 87736 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1832 132215 : ASSEMBLE_MOVX(movsxbl);
1833 87736 : __ AssertZeroExtended(i.OutputRegister());
1834 43868 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1835 43868 : break;
1836 : case kX64Movzxbl:
1837 361834 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1838 545138 : ASSEMBLE_MOVX(movzxbl);
1839 361834 : __ AssertZeroExtended(i.OutputRegister());
1840 180917 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1841 180917 : break;
1842 : case kX64Movsxbq:
1843 27126 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1844 40693 : ASSEMBLE_MOVX(movsxbq);
1845 13563 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1846 13563 : break;
1847 : case kX64Movzxbq:
1848 27592 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1849 41388 : ASSEMBLE_MOVX(movzxbq);
1850 27592 : __ AssertZeroExtended(i.OutputRegister());
1851 13796 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1852 13796 : break;
1853 : case kX64Movb: {
1854 151814 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1855 75907 : size_t index = 0;
1856 75907 : Operand operand = i.MemoryOperand(&index);
1857 151814 : if (HasImmediateInput(instr, index)) {
1858 13284 : __ movb(operand, Immediate(i.InputInt8(index)));
1859 : } else {
1860 138530 : __ movb(operand, i.InputRegister(index));
1861 : }
1862 75907 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1863 : break;
1864 : }
1865 : case kX64Movsxwl:
1866 22886 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1867 34811 : ASSEMBLE_MOVX(movsxwl);
1868 22886 : __ AssertZeroExtended(i.OutputRegister());
1869 11443 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1870 11443 : break;
1871 : case kX64Movzxwl:
1872 318468 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1873 484094 : ASSEMBLE_MOVX(movzxwl);
1874 318468 : __ AssertZeroExtended(i.OutputRegister());
1875 159234 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1876 159234 : break;
1877 : case kX64Movsxwq:
1878 17942 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1879 26917 : ASSEMBLE_MOVX(movsxwq);
1880 : break;
1881 : case kX64Movzxwq:
1882 1344 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1883 2016 : ASSEMBLE_MOVX(movzxwq);
1884 1344 : __ AssertZeroExtended(i.OutputRegister());
1885 672 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1886 672 : break;
1887 : case kX64Movw: {
1888 23686 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1889 11843 : size_t index = 0;
1890 11843 : Operand operand = i.MemoryOperand(&index);
1891 23686 : if (HasImmediateInput(instr, index)) {
1892 1734 : __ movw(operand, Immediate(i.InputInt16(index)));
1893 : } else {
1894 21952 : __ movw(operand, i.InputRegister(index));
1895 : }
1896 11843 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1897 : break;
1898 : }
1899 : case kX64Movl:
1900 4308102 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1901 2153980 : if (instr->HasOutput()) {
1902 611134 : if (instr->addressing_mode() == kMode_None) {
1903 119170 : if (instr->InputAt(0)->IsRegister()) {
1904 111908 : __ movl(i.OutputRegister(), i.InputRegister(0));
1905 : } else {
1906 7262 : __ movl(i.OutputRegister(), i.InputOperand(0));
1907 : }
1908 : } else {
1909 1103108 : __ movl(i.OutputRegister(), i.MemoryOperand());
1910 : }
1911 1222310 : __ AssertZeroExtended(i.OutputRegister());
1912 : } else {
1913 1542846 : size_t index = 0;
1914 1542846 : Operand operand = i.MemoryOperand(&index);
1915 3085882 : if (HasImmediateInput(instr, index)) {
1916 488154 : __ movl(operand, i.InputImmediate(index));
1917 : } else {
1918 2109574 : __ movl(operand, i.InputRegister(index));
1919 : }
1920 : }
1921 2154054 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1922 2154076 : break;
1923 : case kX64Movsxlq:
1924 668606 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1925 1086701 : ASSEMBLE_MOVX(movsxlq);
1926 334303 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1927 334303 : break;
1928 : case kX64MovqDecompressTaggedSigned: {
1929 0 : CHECK(instr->HasOutput());
1930 0 : __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1931 0 : break;
1932 : }
1933 : case kX64MovqDecompressTaggedPointer: {
1934 0 : CHECK(instr->HasOutput());
1935 0 : __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1936 0 : break;
1937 : }
1938 : case kX64MovqDecompressAnyTagged: {
1939 0 : CHECK(instr->HasOutput());
1940 : __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
1941 0 : i.TempRegister(0));
1942 0 : break;
1943 : }
1944 : case kX64MovqCompressTagged: {
1945 0 : CHECK(!instr->HasOutput());
1946 0 : size_t index = 0;
1947 0 : Operand operand = i.MemoryOperand(&index);
1948 0 : if (HasImmediateInput(instr, index)) {
1949 0 : __ StoreTaggedField(operand, i.InputImmediate(index));
1950 : } else {
1951 0 : __ StoreTaggedField(operand, i.InputRegister(index));
1952 : }
1953 : break;
1954 : }
1955 : case kX64Movq:
1956 15395520 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1957 7697749 : if (instr->HasOutput()) {
1958 9747362 : __ movq(i.OutputRegister(), i.MemoryOperand());
1959 : } else {
1960 2824154 : size_t index = 0;
1961 2824154 : Operand operand = i.MemoryOperand(&index);
1962 5648310 : if (HasImmediateInput(instr, index)) {
1963 20762 : __ movq(operand, i.InputImmediate(index));
1964 : } else {
1965 5606786 : __ movq(operand, i.InputRegister(index));
1966 : }
1967 : }
1968 7697865 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1969 7697847 : break;
1970 : case kX64Movss:
1971 53200 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1972 26601 : if (instr->HasOutput()) {
1973 32130 : __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1974 : } else {
1975 10536 : size_t index = 0;
1976 10536 : Operand operand = i.MemoryOperand(&index);
1977 21072 : __ movss(operand, i.InputDoubleRegister(index));
1978 : }
1979 : break;
1980 : case kX64Movsd: {
1981 1228050 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1982 614044 : if (instr->HasOutput()) {
1983 : const MemoryAccessMode access_mode =
1984 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1985 423854 : if (access_mode == kMemoryAccessPoisoned) {
1986 : // If we have to poison the loaded value, we load into a general
1987 : // purpose register first, mask it with the poison, and move the
1988 : // value from the general purpose register into the double register.
1989 0 : __ movq(kScratchRegister, i.MemoryOperand());
1990 0 : __ andq(kScratchRegister, kSpeculationPoisonRegister);
1991 0 : __ Movq(i.OutputDoubleRegister(), kScratchRegister);
1992 : } else {
1993 423854 : __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1994 : }
1995 : } else {
1996 190190 : size_t index = 0;
1997 190190 : Operand operand = i.MemoryOperand(&index);
1998 190192 : __ Movsd(operand, i.InputDoubleRegister(index));
1999 : }
2000 : break;
2001 : }
2002 : case kX64Movdqu: {
2003 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2004 17632 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2005 8816 : if (instr->HasOutput()) {
2006 11584 : __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2007 : } else {
2008 3024 : size_t index = 0;
2009 3024 : Operand operand = i.MemoryOperand(&index);
2010 6048 : __ movdqu(operand, i.InputSimd128Register(index));
2011 : }
2012 : break;
2013 : }
2014 : case kX64BitcastFI:
2015 1112 : if (instr->InputAt(0)->IsFPStackSlot()) {
2016 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
2017 : } else {
2018 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2019 : }
2020 : break;
2021 : case kX64BitcastDL:
2022 1048 : if (instr->InputAt(0)->IsFPStackSlot()) {
2023 0 : __ movq(i.OutputRegister(), i.InputOperand(0));
2024 : } else {
2025 : __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2026 : }
2027 : break;
2028 : case kX64BitcastIF:
2029 616 : if (instr->InputAt(0)->IsRegister()) {
2030 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2031 : } else {
2032 0 : __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2033 : }
2034 : break;
2035 : case kX64BitcastLD:
2036 310 : if (instr->InputAt(0)->IsRegister()) {
2037 : __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2038 : } else {
2039 0 : __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2040 : }
2041 : break;
2042 : case kX64Lea32: {
2043 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2044 : // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2045 : // and addressing mode just happens to work out. The "addl"/"subl" forms
2046 : // in these cases are faster based on measurements.
2047 323724 : if (i.InputRegister(0) == i.OutputRegister()) {
2048 143912 : if (mode == kMode_MRI) {
2049 76789 : int32_t constant_summand = i.InputInt32(1);
2050 : DCHECK_NE(0, constant_summand);
2051 76790 : if (constant_summand > 0) {
2052 110416 : __ addl(i.OutputRegister(), Immediate(constant_summand));
2053 : } else {
2054 : __ subl(i.OutputRegister(),
2055 43164 : Immediate(base::NegateWithWraparound(constant_summand)));
2056 : }
2057 67123 : } else if (mode == kMode_MR1) {
2058 12664 : if (i.InputRegister(1) == i.OutputRegister()) {
2059 966 : __ shll(i.OutputRegister(), Immediate(1));
2060 : } else {
2061 11698 : __ addl(i.OutputRegister(), i.InputRegister(1));
2062 : }
2063 54459 : } else if (mode == kMode_M2) {
2064 0 : __ shll(i.OutputRegister(), Immediate(1));
2065 54459 : } else if (mode == kMode_M4) {
2066 366 : __ shll(i.OutputRegister(), Immediate(2));
2067 54093 : } else if (mode == kMode_M8) {
2068 2703 : __ shll(i.OutputRegister(), Immediate(3));
2069 : } else {
2070 102780 : __ leal(i.OutputRegister(), i.MemoryOperand());
2071 : }
2072 205378 : } else if (mode == kMode_MR1 &&
2073 : i.InputRegister(1) == i.OutputRegister()) {
2074 17027 : __ addl(i.OutputRegister(), i.InputRegister(0));
2075 : } else {
2076 325564 : __ leal(i.OutputRegister(), i.MemoryOperand());
2077 : }
2078 647456 : __ AssertZeroExtended(i.OutputRegister());
2079 323728 : break;
2080 : }
2081 : case kX64Lea: {
2082 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2083 : // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2084 : // and addressing mode just happens to work out. The "addq"/"subq" forms
2085 : // in these cases are faster based on measurements.
2086 2118719 : if (i.InputRegister(0) == i.OutputRegister()) {
2087 627549 : if (mode == kMode_MRI) {
2088 524100 : int32_t constant_summand = i.InputInt32(1);
2089 524102 : if (constant_summand > 0) {
2090 860264 : __ addq(i.OutputRegister(), Immediate(constant_summand));
2091 93970 : } else if (constant_summand < 0) {
2092 281838 : __ subq(i.OutputRegister(), Immediate(-constant_summand));
2093 : }
2094 103449 : } else if (mode == kMode_MR1) {
2095 51484 : if (i.InputRegister(1) == i.OutputRegister()) {
2096 2717 : __ shlq(i.OutputRegister(), Immediate(1));
2097 : } else {
2098 48767 : __ addq(i.OutputRegister(), i.InputRegister(1));
2099 : }
2100 51965 : } else if (mode == kMode_M2) {
2101 0 : __ shlq(i.OutputRegister(), Immediate(1));
2102 51965 : } else if (mode == kMode_M4) {
2103 336 : __ shlq(i.OutputRegister(), Immediate(2));
2104 51629 : } else if (mode == kMode_M8) {
2105 12344 : __ shlq(i.OutputRegister(), Immediate(3));
2106 : } else {
2107 78570 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2108 : }
2109 1743999 : } else if (mode == kMode_MR1 &&
2110 : i.InputRegister(1) == i.OutputRegister()) {
2111 179045 : __ addq(i.OutputRegister(), i.InputRegister(0));
2112 : } else {
2113 2624260 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2114 : }
2115 : break;
2116 : }
2117 : case kX64Dec32:
2118 0 : __ decl(i.OutputRegister());
2119 : break;
2120 : case kX64Inc32:
2121 0 : __ incl(i.OutputRegister());
2122 : break;
2123 : case kX64Push:
2124 3089391 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2125 19480 : size_t index = 0;
2126 19480 : Operand operand = i.MemoryOperand(&index);
2127 19481 : __ pushq(operand);
2128 : frame_access_state()->IncreaseSPDelta(1);
2129 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2130 38962 : kSystemPointerSize);
2131 3069911 : } else if (HasImmediateInput(instr, 0)) {
2132 159249 : __ pushq(i.InputImmediate(0));
2133 : frame_access_state()->IncreaseSPDelta(1);
2134 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2135 318498 : kSystemPointerSize);
2136 2910663 : } else if (instr->InputAt(0)->IsRegister()) {
2137 2181832 : __ pushq(i.InputRegister(0));
2138 : frame_access_state()->IncreaseSPDelta(1);
2139 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2140 4363680 : kSystemPointerSize);
2141 1449742 : } else if (instr->InputAt(0)->IsFloatRegister() ||
2142 : instr->InputAt(0)->IsDoubleRegister()) {
2143 : // TODO(titzer): use another machine instruction?
2144 14806 : __ subq(rsp, Immediate(kDoubleSize));
2145 : frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2146 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2147 29612 : kDoubleSize);
2148 44418 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2149 714025 : } else if (instr->InputAt(0)->IsSimd128Register()) {
2150 : // TODO(titzer): use another machine instruction?
2151 144 : __ subq(rsp, Immediate(kSimd128Size));
2152 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2153 : kSystemPointerSize);
2154 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2155 288 : kSimd128Size);
2156 432 : __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2157 728554 : } else if (instr->InputAt(0)->IsStackSlot() ||
2158 724666 : instr->InputAt(0)->IsFloatStackSlot() ||
2159 : instr->InputAt(0)->IsDoubleStackSlot()) {
2160 713641 : __ pushq(i.InputOperand(0));
2161 : frame_access_state()->IncreaseSPDelta(1);
2162 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2163 1427282 : kSystemPointerSize);
2164 : } else {
2165 : DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2166 240 : __ Movups(kScratchDoubleReg, i.InputOperand(0));
2167 : // TODO(titzer): use another machine instruction?
2168 240 : __ subq(rsp, Immediate(kSimd128Size));
2169 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2170 : kSystemPointerSize);
2171 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2172 480 : kSimd128Size);
2173 480 : __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2174 : }
2175 : break;
2176 : case kX64Poke: {
2177 : int slot = MiscField::decode(instr->opcode());
2178 3392 : if (HasImmediateInput(instr, 0)) {
2179 2264 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2180 : } else {
2181 4520 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2182 : }
2183 : break;
2184 : }
2185 : case kX64Peek: {
2186 5072 : int reverse_slot = i.InputInt32(0);
2187 : int offset =
2188 5072 : FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2189 5072 : if (instr->OutputAt(0)->IsFPRegister()) {
2190 : LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2191 2528 : if (op->representation() == MachineRepresentation::kFloat64) {
2192 2528 : __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2193 : } else {
2194 : DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2195 2528 : __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2196 : }
2197 : } else {
2198 7632 : __ movq(i.OutputRegister(), Operand(rbp, offset));
2199 : }
2200 : break;
2201 : }
2202 : // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2203 : case kX64F32x4Splat: {
2204 140 : XMMRegister dst = i.OutputSimd128Register();
2205 280 : if (instr->InputAt(0)->IsFPRegister()) {
2206 140 : __ movss(dst, i.InputDoubleRegister(0));
2207 : } else {
2208 0 : __ movss(dst, i.InputOperand(0));
2209 : }
2210 140 : __ shufps(dst, dst, 0x0);
2211 : break;
2212 : }
2213 : case kX64F32x4ExtractLane: {
2214 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2215 576 : __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2216 576 : __ movd(i.OutputDoubleRegister(), kScratchRegister);
2217 : break;
2218 : }
2219 : case kX64F32x4ReplaceLane: {
2220 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2221 : // The insertps instruction uses imm8[5:4] to indicate the lane
2222 : // that needs to be replaced.
2223 32 : byte select = i.InputInt8(1) << 4 & 0x30;
2224 64 : __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
2225 : break;
2226 : }
2227 : case kX64F32x4SConvertI32x4: {
2228 4 : __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2229 4 : break;
2230 : }
2231 : case kX64F32x4UConvertI32x4: {
2232 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2233 : DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2234 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2235 : XMMRegister dst = i.OutputSimd128Register();
2236 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2237 4 : __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2238 : __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2239 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2240 4 : __ psrld(dst, 1); // divide by 2 to get in unsigned range
2241 4 : __ cvtdq2ps(dst, dst); // convert hi exactly
2242 4 : __ addps(dst, dst); // double hi, exactly
2243 4 : __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2244 : break;
2245 : }
2246 : case kX64F32x4Abs: {
2247 : XMMRegister dst = i.OutputSimd128Register();
2248 : XMMRegister src = i.InputSimd128Register(0);
2249 4 : if (dst == src) {
2250 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2251 4 : __ psrld(kScratchDoubleReg, 1);
2252 8 : __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2253 : } else {
2254 0 : __ pcmpeqd(dst, dst);
2255 0 : __ psrld(dst, 1);
2256 0 : __ andps(dst, i.InputSimd128Register(0));
2257 : }
2258 : break;
2259 : }
2260 : case kX64F32x4Neg: {
2261 : XMMRegister dst = i.OutputSimd128Register();
2262 : XMMRegister src = i.InputSimd128Register(0);
2263 4 : if (dst == src) {
2264 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2265 4 : __ pslld(kScratchDoubleReg, 31);
2266 8 : __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2267 : } else {
2268 0 : __ pcmpeqd(dst, dst);
2269 0 : __ pslld(dst, 31);
2270 0 : __ xorps(dst, i.InputSimd128Register(0));
2271 : }
2272 : break;
2273 : }
2274 : case kX64F32x4RecipApprox: {
2275 4 : __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2276 4 : break;
2277 : }
2278 : case kX64F32x4RecipSqrtApprox: {
2279 4 : __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2280 4 : break;
2281 : }
2282 : case kX64F32x4Add: {
2283 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2284 12 : __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2285 12 : break;
2286 : }
2287 : case kX64F32x4AddHoriz: {
2288 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2289 : CpuFeatureScope sse_scope(tasm(), SSE3);
2290 4 : __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2291 : break;
2292 : }
2293 : case kX64F32x4Sub: {
2294 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2295 4 : __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2296 4 : break;
2297 : }
2298 : case kX64F32x4Mul: {
2299 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2300 4 : __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2301 4 : break;
2302 : }
2303 : case kX64F32x4Min: {
2304 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2305 4 : __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2306 4 : break;
2307 : }
2308 : case kX64F32x4Max: {
2309 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2310 4 : __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2311 4 : break;
2312 : }
2313 : case kX64F32x4Eq: {
2314 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2315 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2316 4 : break;
2317 : }
2318 : case kX64F32x4Ne: {
2319 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2320 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2321 4 : break;
2322 : }
2323 : case kX64F32x4Lt: {
2324 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2325 8 : __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2326 : break;
2327 : }
2328 : case kX64F32x4Le: {
2329 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2330 8 : __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2331 : break;
2332 : }
2333 : case kX64I32x4Splat: {
2334 1076 : XMMRegister dst = i.OutputSimd128Register();
2335 1076 : __ movd(dst, i.InputRegister(0));
2336 1076 : __ pshufd(dst, dst, 0x0);
2337 : break;
2338 : }
2339 : case kX64I32x4ExtractLane: {
2340 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2341 7992 : __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2342 : break;
2343 : }
2344 : case kX64I32x4ReplaceLane: {
2345 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2346 3568 : if (instr->InputAt(2)->IsRegister()) {
2347 : __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2348 592 : i.InputInt8(1));
2349 : } else {
2350 2976 : __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2351 : }
2352 : break;
2353 : }
2354 : case kX64I32x4SConvertF32x4: {
2355 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2356 : XMMRegister dst = i.OutputSimd128Register();
2357 : // NAN->0
2358 4 : __ movaps(kScratchDoubleReg, dst);
2359 : __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2360 : __ pand(dst, kScratchDoubleReg);
2361 : // Set top bit if >= 0 (but not -0.0!)
2362 : __ pxor(kScratchDoubleReg, dst);
2363 : // Convert
2364 4 : __ cvttps2dq(dst, dst);
2365 : // Set top bit if >=0 is now < 0
2366 : __ pand(kScratchDoubleReg, dst);
2367 4 : __ psrad(kScratchDoubleReg, 31);
2368 : // Set positive overflow lanes to 0x7FFFFFFF
2369 : __ pxor(dst, kScratchDoubleReg);
2370 : break;
2371 : }
2372 : case kX64I32x4SConvertI16x8Low: {
2373 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2374 4 : __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2375 : break;
2376 : }
2377 : case kX64I32x4SConvertI16x8High: {
2378 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2379 : XMMRegister dst = i.OutputSimd128Register();
2380 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2381 : __ pmovsxwd(dst, dst);
2382 : break;
2383 : }
2384 : case kX64I32x4Neg: {
2385 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2386 : XMMRegister dst = i.OutputSimd128Register();
2387 : XMMRegister src = i.InputSimd128Register(0);
2388 4 : if (dst == src) {
2389 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2390 : __ psignd(dst, kScratchDoubleReg);
2391 : } else {
2392 0 : __ pxor(dst, dst);
2393 : __ psubd(dst, src);
2394 : }
2395 : break;
2396 : }
2397 : case kX64I32x4Shl: {
2398 248 : __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2399 124 : break;
2400 : }
2401 : case kX64I32x4ShrS: {
2402 248 : __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2403 124 : break;
2404 : }
2405 : case kX64I32x4Add: {
2406 12 : __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2407 : break;
2408 : }
2409 : case kX64I32x4AddHoriz: {
2410 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2411 4 : __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2412 : break;
2413 : }
2414 : case kX64I32x4Sub: {
2415 4 : __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2416 : break;
2417 : }
2418 : case kX64I32x4Mul: {
2419 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2420 4 : __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2421 : break;
2422 : }
2423 : case kX64I32x4MinS: {
2424 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2425 4 : __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2426 : break;
2427 : }
2428 : case kX64I32x4MaxS: {
2429 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2430 4 : __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2431 : break;
2432 : }
2433 : case kX64I32x4Eq: {
2434 12 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2435 : break;
2436 : }
2437 : case kX64I32x4Ne: {
2438 16 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2439 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2440 16 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2441 : break;
2442 : }
2443 : case kX64I32x4GtS: {
2444 8 : __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2445 : break;
2446 : }
2447 : case kX64I32x4GeS: {
2448 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2449 : XMMRegister dst = i.OutputSimd128Register();
2450 : XMMRegister src = i.InputSimd128Register(1);
2451 8 : __ pminsd(dst, src);
2452 : __ pcmpeqd(dst, src);
2453 : break;
2454 : }
2455 : case kX64I32x4UConvertF32x4: {
2456 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2457 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2458 : XMMRegister dst = i.OutputSimd128Register();
2459 4 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2460 : // NAN->0, negative->0
2461 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2462 4 : __ maxps(dst, kScratchDoubleReg);
2463 : // scratch: float representation of max_signed
2464 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2465 4 : __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2466 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2467 : // tmp: convert (src-max_signed).
2468 : // Positive overflow lanes -> 0x7FFFFFFF
2469 : // Negative lanes -> 0
2470 4 : __ movaps(tmp, dst);
2471 4 : __ subps(tmp, kScratchDoubleReg);
2472 : __ cmpleps(kScratchDoubleReg, tmp);
2473 4 : __ cvttps2dq(tmp, tmp);
2474 : __ pxor(tmp, kScratchDoubleReg);
2475 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2476 : __ pmaxsd(tmp, kScratchDoubleReg);
2477 : // convert. Overflow lanes above max_signed will be 0x80000000
2478 4 : __ cvttps2dq(dst, dst);
2479 : // Add (src-max_signed) for overflow lanes.
2480 : __ paddd(dst, tmp);
2481 : break;
2482 : }
2483 : case kX64I32x4UConvertI16x8Low: {
2484 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2485 4 : __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2486 : break;
2487 : }
2488 : case kX64I32x4UConvertI16x8High: {
2489 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2490 : XMMRegister dst = i.OutputSimd128Register();
2491 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2492 : __ pmovzxwd(dst, dst);
2493 : break;
2494 : }
2495 : case kX64I32x4ShrU: {
2496 248 : __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2497 124 : break;
2498 : }
2499 : case kX64I32x4MinU: {
2500 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2501 4 : __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2502 : break;
2503 : }
2504 : case kX64I32x4MaxU: {
2505 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2506 4 : __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2507 : break;
2508 : }
2509 : case kX64I32x4GtU: {
2510 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2511 : XMMRegister dst = i.OutputSimd128Register();
2512 : XMMRegister src = i.InputSimd128Register(1);
2513 8 : __ pmaxud(dst, src);
2514 : __ pcmpeqd(dst, src);
2515 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2516 : __ pxor(dst, kScratchDoubleReg);
2517 : break;
2518 : }
2519 : case kX64I32x4GeU: {
2520 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2521 : XMMRegister dst = i.OutputSimd128Register();
2522 : XMMRegister src = i.InputSimd128Register(1);
2523 8 : __ pminud(dst, src);
2524 : __ pcmpeqd(dst, src);
2525 : break;
2526 : }
2527 : case kX64S128Zero: {
2528 16 : XMMRegister dst = i.OutputSimd128Register();
2529 16 : __ xorps(dst, dst);
2530 : break;
2531 : }
2532 : case kX64I16x8Splat: {
2533 408 : XMMRegister dst = i.OutputSimd128Register();
2534 408 : __ movd(dst, i.InputRegister(0));
2535 408 : __ pshuflw(dst, dst, 0x0);
2536 408 : __ pshufd(dst, dst, 0x0);
2537 : break;
2538 : }
2539 : case kX64I16x8ExtractLane: {
2540 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2541 2656 : Register dst = i.OutputRegister();
2542 5312 : __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2543 2656 : __ movsxwl(dst, dst);
2544 : break;
2545 : }
2546 : case kX64I16x8ReplaceLane: {
2547 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2548 112 : if (instr->InputAt(2)->IsRegister()) {
2549 : __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2550 112 : i.InputInt8(1));
2551 : } else {
2552 0 : __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2553 : }
2554 : break;
2555 : }
2556 : case kX64I16x8SConvertI8x16Low: {
2557 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2558 4 : __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2559 : break;
2560 : }
2561 : case kX64I16x8SConvertI8x16High: {
2562 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2563 : XMMRegister dst = i.OutputSimd128Register();
2564 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2565 : __ pmovsxbw(dst, dst);
2566 : break;
2567 : }
2568 : case kX64I16x8Neg: {
2569 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2570 : XMMRegister dst = i.OutputSimd128Register();
2571 : XMMRegister src = i.InputSimd128Register(0);
2572 4 : if (dst == src) {
2573 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2574 : __ psignw(dst, kScratchDoubleReg);
2575 : } else {
2576 0 : __ pxor(dst, dst);
2577 : __ psubw(dst, src);
2578 : }
2579 : break;
2580 : }
2581 : case kX64I16x8Shl: {
2582 120 : __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2583 60 : break;
2584 : }
2585 : case kX64I16x8ShrS: {
2586 120 : __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2587 60 : break;
2588 : }
2589 : case kX64I16x8SConvertI32x4: {
2590 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2591 4 : __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2592 : break;
2593 : }
2594 : case kX64I16x8Add: {
2595 4 : __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2596 : break;
2597 : }
2598 : case kX64I16x8AddSaturateS: {
2599 4 : __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2600 : break;
2601 : }
2602 : case kX64I16x8AddHoriz: {
2603 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2604 4 : __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2605 : break;
2606 : }
2607 : case kX64I16x8Sub: {
2608 4 : __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2609 : break;
2610 : }
2611 : case kX64I16x8SubSaturateS: {
2612 4 : __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2613 : break;
2614 : }
2615 : case kX64I16x8Mul: {
2616 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2617 4 : __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2618 : break;
2619 : }
2620 : case kX64I16x8MinS: {
2621 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2622 4 : __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2623 : break;
2624 : }
2625 : case kX64I16x8MaxS: {
2626 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2627 4 : __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2628 : break;
2629 : }
2630 : case kX64I16x8Eq: {
2631 12 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2632 : break;
2633 : }
2634 : case kX64I16x8Ne: {
2635 16 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2636 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2637 16 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2638 : break;
2639 : }
2640 : case kX64I16x8GtS: {
2641 8 : __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2642 : break;
2643 : }
2644 : case kX64I16x8GeS: {
2645 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2646 : XMMRegister dst = i.OutputSimd128Register();
2647 : XMMRegister src = i.InputSimd128Register(1);
2648 8 : __ pminsw(dst, src);
2649 : __ pcmpeqw(dst, src);
2650 : break;
2651 : }
2652 : case kX64I16x8UConvertI8x16Low: {
2653 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2654 4 : __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2655 : break;
2656 : }
2657 : case kX64I16x8UConvertI8x16High: {
2658 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2659 : XMMRegister dst = i.OutputSimd128Register();
2660 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2661 : __ pmovzxbw(dst, dst);
2662 : break;
2663 : }
2664 : case kX64I16x8ShrU: {
2665 120 : __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2666 60 : break;
2667 : }
2668 : case kX64I16x8UConvertI32x4: {
2669 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2670 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2671 : XMMRegister dst = i.OutputSimd128Register();
2672 : // Change negative lanes to 0x7FFFFFFF
2673 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2674 4 : __ psrld(kScratchDoubleReg, 1);
2675 : __ pminud(dst, kScratchDoubleReg);
2676 4 : __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2677 : __ packusdw(dst, kScratchDoubleReg);
2678 : break;
2679 : }
2680 : case kX64I16x8AddSaturateU: {
2681 4 : __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2682 : break;
2683 : }
2684 : case kX64I16x8SubSaturateU: {
2685 4 : __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2686 : break;
2687 : }
2688 : case kX64I16x8MinU: {
2689 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2690 4 : __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2691 : break;
2692 : }
2693 : case kX64I16x8MaxU: {
2694 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2695 4 : __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2696 : break;
2697 : }
2698 : case kX64I16x8GtU: {
2699 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2700 : XMMRegister dst = i.OutputSimd128Register();
2701 : XMMRegister src = i.InputSimd128Register(1);
2702 8 : __ pmaxuw(dst, src);
2703 : __ pcmpeqw(dst, src);
2704 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2705 : __ pxor(dst, kScratchDoubleReg);
2706 : break;
2707 : }
2708 : case kX64I16x8GeU: {
2709 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2710 : XMMRegister dst = i.OutputSimd128Register();
2711 : XMMRegister src = i.InputSimd128Register(1);
2712 16 : __ pminuw(dst, src);
2713 : __ pcmpeqw(dst, src);
2714 : break;
2715 : }
2716 : case kX64I8x16Splat: {
2717 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2718 : XMMRegister dst = i.OutputSimd128Register();
2719 304 : __ movd(dst, i.InputRegister(0));
2720 304 : __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2721 : __ pshufb(dst, kScratchDoubleReg);
2722 : break;
2723 : }
2724 : case kX64I8x16ExtractLane: {
2725 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2726 3968 : Register dst = i.OutputRegister();
2727 7936 : __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2728 3968 : __ movsxbl(dst, dst);
2729 : break;
2730 : }
2731 : case kX64I8x16ReplaceLane: {
2732 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2733 176 : if (instr->InputAt(2)->IsRegister()) {
2734 : __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2735 176 : i.InputInt8(1));
2736 : } else {
2737 0 : __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2738 : }
2739 : break;
2740 : }
2741 : case kX64I8x16SConvertI16x8: {
2742 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2743 4 : __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2744 : break;
2745 : }
2746 : case kX64I8x16Neg: {
2747 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2748 : XMMRegister dst = i.OutputSimd128Register();
2749 : XMMRegister src = i.InputSimd128Register(0);
2750 4 : if (dst == src) {
2751 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2752 : __ psignb(dst, kScratchDoubleReg);
2753 : } else {
2754 0 : __ pxor(dst, dst);
2755 : __ psubb(dst, src);
2756 : }
2757 : break;
2758 : }
2759 : case kX64I8x16Shl: {
2760 : XMMRegister dst = i.OutputSimd128Register();
2761 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2762 28 : int8_t shift = i.InputInt8(1) & 0x7;
2763 28 : if (shift < 4) {
2764 : // For small shifts, doubling is faster.
2765 24 : for (int i = 0; i < shift; ++i) {
2766 24 : __ paddb(dst, dst);
2767 : }
2768 : } else {
2769 : // Mask off the unwanted bits before word-shifting.
2770 16 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2771 16 : __ psrlw(kScratchDoubleReg, 8 + shift);
2772 : __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2773 : __ pand(dst, kScratchDoubleReg);
2774 16 : __ psllw(dst, shift);
2775 : }
2776 : break;
2777 : }
2778 : case kX64I8x16ShrS: {
2779 : XMMRegister dst = i.OutputSimd128Register();
2780 : XMMRegister src = i.InputSimd128Register(0);
2781 28 : int8_t shift = i.InputInt8(1) & 0x7;
2782 : // Unpack the bytes into words, do arithmetic shifts, and repack.
2783 28 : __ punpckhbw(kScratchDoubleReg, src);
2784 : __ punpcklbw(dst, src);
2785 28 : __ psraw(kScratchDoubleReg, 8 + shift);
2786 28 : __ psraw(dst, 8 + shift);
2787 : __ packsswb(dst, kScratchDoubleReg);
2788 : break;
2789 : }
2790 : case kX64I8x16Add: {
2791 4 : __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2792 : break;
2793 : }
2794 : case kX64I8x16AddSaturateS: {
2795 4 : __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2796 : break;
2797 : }
2798 : case kX64I8x16Sub: {
2799 4 : __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2800 : break;
2801 : }
2802 : case kX64I8x16SubSaturateS: {
2803 4 : __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2804 : break;
2805 : }
2806 : case kX64I8x16Mul: {
2807 : XMMRegister dst = i.OutputSimd128Register();
2808 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2809 : XMMRegister right = i.InputSimd128Register(1);
2810 4 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2811 : // I16x8 view of I8x16
2812 : // left = AAaa AAaa ... AAaa AAaa
2813 : // right= BBbb BBbb ... BBbb BBbb
2814 : // t = 00AA 00AA ... 00AA 00AA
2815 : // s = 00BB 00BB ... 00BB 00BB
2816 4 : __ movaps(tmp, dst);
2817 4 : __ movaps(kScratchDoubleReg, right);
2818 4 : __ psrlw(tmp, 8);
2819 4 : __ psrlw(kScratchDoubleReg, 8);
2820 : // dst = left * 256
2821 4 : __ psllw(dst, 8);
2822 : // t = I16x8Mul(t, s)
2823 : // => __PP __PP ... __PP __PP
2824 : __ pmullw(tmp, kScratchDoubleReg);
2825 : // dst = I16x8Mul(left * 256, right)
2826 : // => pp__ pp__ ... pp__ pp__
2827 : __ pmullw(dst, right);
2828 : // t = I16x8Shl(t, 8)
2829 : // => PP00 PP00 ... PP00 PP00
2830 4 : __ psllw(tmp, 8);
2831 : // dst = I16x8Shr(dst, 8)
2832 : // => 00pp 00pp ... 00pp 00pp
2833 4 : __ psrlw(dst, 8);
2834 : // dst = I16x8Or(dst, t)
2835 : // => PPpp PPpp ... PPpp PPpp
2836 : __ por(dst, tmp);
2837 : break;
2838 : }
2839 : case kX64I8x16MinS: {
2840 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2841 4 : __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2842 : break;
2843 : }
2844 : case kX64I8x16MaxS: {
2845 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2846 4 : __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2847 : break;
2848 : }
2849 : case kX64I8x16Eq: {
2850 12 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2851 : break;
2852 : }
2853 : case kX64I8x16Ne: {
2854 16 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2855 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2856 16 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2857 : break;
2858 : }
2859 : case kX64I8x16GtS: {
2860 8 : __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2861 : break;
2862 : }
2863 : case kX64I8x16GeS: {
2864 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2865 : XMMRegister dst = i.OutputSimd128Register();
2866 : XMMRegister src = i.InputSimd128Register(1);
2867 8 : __ pminsb(dst, src);
2868 : __ pcmpeqb(dst, src);
2869 : break;
2870 : }
2871 : case kX64I8x16UConvertI16x8: {
2872 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2873 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2874 : XMMRegister dst = i.OutputSimd128Register();
2875 : // Change negative lanes to 0x7FFF
2876 4 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2877 4 : __ psrlw(kScratchDoubleReg, 1);
2878 : __ pminuw(dst, kScratchDoubleReg);
2879 4 : __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2880 : __ packuswb(dst, kScratchDoubleReg);
2881 : break;
2882 : }
2883 : case kX64I8x16ShrU: {
2884 : XMMRegister dst = i.OutputSimd128Register();
2885 : XMMRegister src = i.InputSimd128Register(0);
2886 28 : int8_t shift = i.InputInt8(1) & 0x7;
2887 : // Unpack the bytes into words, do logical shifts, and repack.
2888 28 : __ punpckhbw(kScratchDoubleReg, src);
2889 : __ punpcklbw(dst, src);
2890 28 : __ psrlw(kScratchDoubleReg, 8 + shift);
2891 28 : __ psrlw(dst, 8 + shift);
2892 : __ packuswb(dst, kScratchDoubleReg);
2893 : break;
2894 : }
2895 : case kX64I8x16AddSaturateU: {
2896 4 : __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2897 : break;
2898 : }
2899 : case kX64I8x16SubSaturateU: {
2900 4 : __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2901 : break;
2902 : }
2903 : case kX64I8x16MinU: {
2904 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2905 4 : __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2906 : break;
2907 : }
2908 : case kX64I8x16MaxU: {
2909 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2910 4 : __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2911 : break;
2912 : }
2913 : case kX64I8x16GtU: {
2914 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2915 : XMMRegister dst = i.OutputSimd128Register();
2916 : XMMRegister src = i.InputSimd128Register(1);
2917 8 : __ pmaxub(dst, src);
2918 : __ pcmpeqb(dst, src);
2919 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2920 : __ pxor(dst, kScratchDoubleReg);
2921 : break;
2922 : }
2923 : case kX64I8x16GeU: {
2924 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2925 : XMMRegister dst = i.OutputSimd128Register();
2926 : XMMRegister src = i.InputSimd128Register(1);
2927 0 : __ pminub(dst, src);
2928 : __ pcmpeqb(dst, src);
2929 : break;
2930 : }
2931 : case kX64S128And: {
2932 4 : __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
2933 : break;
2934 : }
2935 : case kX64S128Or: {
2936 4 : __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
2937 : break;
2938 : }
2939 : case kX64S128Xor: {
2940 4 : __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
2941 : break;
2942 : }
2943 : case kX64S128Not: {
2944 : XMMRegister dst = i.OutputSimd128Register();
2945 : XMMRegister src = i.InputSimd128Register(0);
2946 4 : if (dst == src) {
2947 4 : __ movaps(kScratchDoubleReg, dst);
2948 : __ pcmpeqd(dst, dst);
2949 : __ pxor(dst, kScratchDoubleReg);
2950 : } else {
2951 0 : __ pcmpeqd(dst, dst);
2952 : __ pxor(dst, src);
2953 : }
2954 :
2955 : break;
2956 : }
2957 : case kX64S128Select: {
2958 : // Mask used here is stored in dst.
2959 28 : XMMRegister dst = i.OutputSimd128Register();
2960 28 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
2961 56 : __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
2962 28 : __ andps(dst, kScratchDoubleReg);
2963 56 : __ xorps(dst, i.InputSimd128Register(2));
2964 : break;
2965 : }
2966 : case kX64S8x16Shuffle: {
2967 : XMMRegister dst = i.OutputSimd128Register();
2968 : Register tmp = i.TempRegister(0);
2969 : // Prepare 16 byte aligned buffer for shuffle control mask
2970 1544 : __ movq(tmp, rsp);
2971 1544 : __ andq(rsp, Immediate(-16));
2972 1544 : if (instr->InputCount() == 5) { // only one input operand
2973 752 : uint32_t mask[4] = {};
2974 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2975 4512 : for (int j = 4; j > 0; j--) {
2976 6016 : mask[j - 1] = i.InputUint32(j);
2977 : }
2978 :
2979 752 : SetupShuffleMaskOnStack(tasm(), mask);
2980 1504 : __ pshufb(dst, Operand(rsp, 0));
2981 : } else { // two input operands
2982 : DCHECK_EQ(6, instr->InputCount());
2983 2376 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
2984 792 : uint32_t mask[4] = {};
2985 3960 : for (int j = 5; j > 1; j--) {
2986 3168 : uint32_t lanes = i.InputUint32(j);
2987 15840 : for (int k = 0; k < 32; k += 8) {
2988 12672 : uint8_t lane = lanes >> k;
2989 12672 : mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
2990 : }
2991 : }
2992 792 : SetupShuffleMaskOnStack(tasm(), mask);
2993 1584 : __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
2994 792 : uint32_t mask1[4] = {};
2995 1584 : if (instr->InputAt(1)->IsSimd128Register()) {
2996 792 : XMMRegister src1 = i.InputSimd128Register(1);
2997 792 : if (src1 != dst) __ movups(dst, src1);
2998 : } else {
2999 0 : __ movups(dst, i.InputOperand(1));
3000 : }
3001 3168 : for (int j = 5; j > 1; j--) {
3002 3168 : uint32_t lanes = i.InputUint32(j);
3003 15840 : for (int k = 0; k < 32; k += 8) {
3004 12672 : uint8_t lane = lanes >> k;
3005 12672 : mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3006 : }
3007 : }
3008 792 : SetupShuffleMaskOnStack(tasm(), mask1);
3009 1584 : __ pshufb(dst, Operand(rsp, 0));
3010 : __ por(dst, kScratchDoubleReg);
3011 : }
3012 : __ movq(rsp, tmp);
3013 : break;
3014 : }
3015 : case kX64S32x4Swizzle: {
3016 : DCHECK_EQ(2, instr->InputCount());
3017 1216 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3018 : i.InputInt8(1));
3019 : break;
3020 : }
3021 : case kX64S32x4Shuffle: {
3022 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3023 : DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3024 : int8_t shuffle = i.InputInt8(2);
3025 : DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3026 1404 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3027 1404 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3028 936 : __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3029 : break;
3030 : }
3031 : case kX64S16x8Blend: {
3032 112 : ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3033 56 : break;
3034 : }
3035 : case kX64S16x8HalfShuffle1: {
3036 248 : XMMRegister dst = i.OutputSimd128Register();
3037 992 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3038 248 : __ pshufhw(dst, dst, i.InputInt8(2));
3039 : break;
3040 : }
3041 : case kX64S16x8HalfShuffle2: {
3042 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3043 176 : XMMRegister dst = i.OutputSimd128Register();
3044 704 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3045 176 : __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3046 704 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3047 176 : __ pshufhw(dst, dst, i.InputInt8(3));
3048 176 : __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3049 : break;
3050 : }
3051 : case kX64S8x16Alignr: {
3052 480 : ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3053 240 : break;
3054 : }
3055 : case kX64S16x8Dup: {
3056 112 : XMMRegister dst = i.OutputSimd128Register();
3057 112 : int8_t lane = i.InputInt8(1) & 0x7;
3058 112 : int8_t lane4 = lane & 0x3;
3059 112 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3060 112 : if (lane < 4) {
3061 336 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3062 112 : __ pshufd(dst, dst, 0);
3063 : } else {
3064 0 : ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3065 0 : __ pshufd(dst, dst, 0xaa);
3066 : }
3067 : break;
3068 : }
3069 : case kX64S8x16Dup: {
3070 : XMMRegister dst = i.OutputSimd128Register();
3071 216 : int8_t lane = i.InputInt8(1) & 0xf;
3072 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3073 216 : if (lane < 8) {
3074 216 : __ punpcklbw(dst, dst);
3075 : } else {
3076 0 : __ punpckhbw(dst, dst);
3077 : }
3078 216 : lane &= 0x7;
3079 216 : int8_t lane4 = lane & 0x3;
3080 216 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3081 216 : if (lane < 4) {
3082 200 : __ pshuflw(dst, dst, half_dup);
3083 200 : __ pshufd(dst, dst, 0);
3084 : } else {
3085 16 : __ pshufhw(dst, dst, half_dup);
3086 16 : __ pshufd(dst, dst, 0xaa);
3087 : }
3088 : break;
3089 : }
3090 : case kX64S64x2UnpackHigh:
3091 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3092 : break;
3093 : case kX64S32x4UnpackHigh:
3094 320 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3095 : break;
3096 : case kX64S16x8UnpackHigh:
3097 480 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3098 : break;
3099 : case kX64S8x16UnpackHigh:
3100 448 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3101 : break;
3102 : case kX64S64x2UnpackLow:
3103 64 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3104 : break;
3105 : case kX64S32x4UnpackLow:
3106 416 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3107 : break;
3108 : case kX64S16x8UnpackLow:
3109 400 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3110 : break;
3111 : case kX64S8x16UnpackLow:
3112 432 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3113 : break;
3114 : case kX64S16x8UnzipHigh: {
3115 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3116 : XMMRegister dst = i.OutputSimd128Register();
3117 : XMMRegister src2 = dst;
3118 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3119 84 : if (instr->InputCount() == 2) {
3120 228 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3121 76 : __ psrld(kScratchDoubleReg, 16);
3122 : src2 = kScratchDoubleReg;
3123 : }
3124 84 : __ psrld(dst, 16);
3125 : __ packusdw(dst, src2);
3126 : break;
3127 : }
3128 : case kX64S16x8UnzipLow: {
3129 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3130 : XMMRegister dst = i.OutputSimd128Register();
3131 : XMMRegister src2 = dst;
3132 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3133 84 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3134 84 : if (instr->InputCount() == 2) {
3135 224 : ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3136 : src2 = kScratchDoubleReg;
3137 : }
3138 84 : __ pblendw(dst, kScratchDoubleReg, 0xaa);
3139 : __ packusdw(dst, src2);
3140 : break;
3141 : }
3142 : case kX64S8x16UnzipHigh: {
3143 : XMMRegister dst = i.OutputSimd128Register();
3144 : XMMRegister src2 = dst;
3145 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3146 88 : if (instr->InputCount() == 2) {
3147 236 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3148 80 : __ psrlw(kScratchDoubleReg, 8);
3149 : src2 = kScratchDoubleReg;
3150 : }
3151 88 : __ psrlw(dst, 8);
3152 : __ packuswb(dst, src2);
3153 : break;
3154 : }
3155 : case kX64S8x16UnzipLow: {
3156 : XMMRegister dst = i.OutputSimd128Register();
3157 : XMMRegister src2 = dst;
3158 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3159 104 : if (instr->InputCount() == 2) {
3160 288 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3161 96 : __ psllw(kScratchDoubleReg, 8);
3162 96 : __ psrlw(kScratchDoubleReg, 8);
3163 : src2 = kScratchDoubleReg;
3164 : }
3165 104 : __ psllw(dst, 8);
3166 104 : __ psrlw(dst, 8);
3167 : __ packuswb(dst, src2);
3168 : break;
3169 : }
3170 : case kX64S8x16TransposeLow: {
3171 : XMMRegister dst = i.OutputSimd128Register();
3172 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3173 124 : __ psllw(dst, 8);
3174 124 : if (instr->InputCount() == 1) {
3175 8 : __ movups(kScratchDoubleReg, dst);
3176 : } else {
3177 : DCHECK_EQ(2, instr->InputCount());
3178 348 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3179 116 : __ psllw(kScratchDoubleReg, 8);
3180 : }
3181 124 : __ psrlw(dst, 8);
3182 : __ por(dst, kScratchDoubleReg);
3183 : break;
3184 : }
3185 : case kX64S8x16TransposeHigh: {
3186 : XMMRegister dst = i.OutputSimd128Register();
3187 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3188 112 : __ psrlw(dst, 8);
3189 112 : if (instr->InputCount() == 1) {
3190 8 : __ movups(kScratchDoubleReg, dst);
3191 : } else {
3192 : DCHECK_EQ(2, instr->InputCount());
3193 312 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3194 104 : __ psrlw(kScratchDoubleReg, 8);
3195 : }
3196 112 : __ psllw(kScratchDoubleReg, 8);
3197 : __ por(dst, kScratchDoubleReg);
3198 : break;
3199 : }
3200 : case kX64S8x8Reverse:
3201 : case kX64S8x4Reverse:
3202 : case kX64S8x2Reverse: {
3203 : DCHECK_EQ(1, instr->InputCount());
3204 : XMMRegister dst = i.OutputSimd128Register();
3205 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3206 316 : if (arch_opcode != kX64S8x2Reverse) {
3207 : // First shuffle words into position.
3208 224 : int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3209 224 : __ pshuflw(dst, dst, shuffle_mask);
3210 224 : __ pshufhw(dst, dst, shuffle_mask);
3211 : }
3212 316 : __ movaps(kScratchDoubleReg, dst);
3213 316 : __ psrlw(kScratchDoubleReg, 8);
3214 316 : __ psllw(dst, 8);
3215 : __ por(dst, kScratchDoubleReg);
3216 : break;
3217 : }
3218 : case kX64S1x4AnyTrue:
3219 : case kX64S1x8AnyTrue:
3220 : case kX64S1x16AnyTrue: {
3221 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3222 : Register dst = i.OutputRegister();
3223 : XMMRegister src = i.InputSimd128Register(0);
3224 : Register tmp = i.TempRegister(0);
3225 60 : __ xorq(tmp, tmp);
3226 : __ movq(dst, Immediate(1));
3227 : __ ptest(src, src);
3228 60 : __ cmovq(zero, dst, tmp);
3229 : break;
3230 : }
3231 : case kX64S1x4AllTrue:
3232 : case kX64S1x8AllTrue:
3233 : case kX64S1x16AllTrue: {
3234 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3235 : Register dst = i.OutputRegister();
3236 : XMMRegister src = i.InputSimd128Register(0);
3237 : Register tmp = i.TempRegister(0);
3238 60 : __ movq(tmp, Immediate(1));
3239 : __ xorq(dst, dst);
3240 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3241 : __ pxor(kScratchDoubleReg, src);
3242 : __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3243 60 : __ cmovq(zero, dst, tmp);
3244 : break;
3245 : }
3246 : case kX64StackCheck:
3247 556438 : __ CompareRoot(rsp, RootIndex::kStackLimit);
3248 556443 : break;
3249 : case kWord32AtomicExchangeInt8: {
3250 1974 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3251 1974 : __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3252 987 : break;
3253 : }
3254 : case kWord32AtomicExchangeUint8: {
3255 1296 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3256 648 : __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3257 : break;
3258 : }
3259 : case kWord32AtomicExchangeInt16: {
3260 1388 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3261 1388 : __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3262 694 : break;
3263 : }
3264 : case kWord32AtomicExchangeUint16: {
3265 1896 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3266 948 : __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3267 : break;
3268 : }
3269 : case kWord32AtomicExchangeWord32: {
3270 2400 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3271 : break;
3272 : }
3273 : case kWord32AtomicCompareExchangeInt8: {
3274 112 : __ lock();
3275 224 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3276 112 : __ movsxbl(rax, rax);
3277 112 : break;
3278 : }
3279 : case kWord32AtomicCompareExchangeUint8: {
3280 129 : __ lock();
3281 258 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3282 : __ movzxbl(rax, rax);
3283 : break;
3284 : }
3285 : case kWord32AtomicCompareExchangeInt16: {
3286 112 : __ lock();
3287 224 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3288 112 : __ movsxwl(rax, rax);
3289 112 : break;
3290 : }
3291 : case kWord32AtomicCompareExchangeUint16: {
3292 129 : __ lock();
3293 258 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3294 : __ movzxwl(rax, rax);
3295 : break;
3296 : }
3297 : case kWord32AtomicCompareExchangeWord32: {
3298 257 : __ lock();
3299 257 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3300 : break;
3301 : }
3302 : #define ATOMIC_BINOP_CASE(op, inst) \
3303 : case kWord32Atomic##op##Int8: \
3304 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3305 : __ movsxbl(rax, rax); \
3306 : break; \
3307 : case kWord32Atomic##op##Uint8: \
3308 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3309 : __ movzxbl(rax, rax); \
3310 : break; \
3311 : case kWord32Atomic##op##Int16: \
3312 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3313 : __ movsxwl(rax, rax); \
3314 : break; \
3315 : case kWord32Atomic##op##Uint16: \
3316 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3317 : __ movzxwl(rax, rax); \
3318 : break; \
3319 : case kWord32Atomic##op##Word32: \
3320 : ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3321 : break;
3322 11291 : ATOMIC_BINOP_CASE(Add, addl)
3323 11628 : ATOMIC_BINOP_CASE(Sub, subl)
3324 11307 : ATOMIC_BINOP_CASE(And, andl)
3325 11164 : ATOMIC_BINOP_CASE(Or, orl)
3326 12044 : ATOMIC_BINOP_CASE(Xor, xorl)
3327 : #undef ATOMIC_BINOP_CASE
3328 : case kX64Word64AtomicExchangeUint8: {
3329 3624 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3330 1812 : __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3331 : break;
3332 : }
3333 : case kX64Word64AtomicExchangeUint16: {
3334 3322 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3335 1661 : __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3336 : break;
3337 : }
3338 : case kX64Word64AtomicExchangeUint32: {
3339 1704 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3340 : break;
3341 : }
3342 : case kX64Word64AtomicExchangeUint64: {
3343 1768 : __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3344 : break;
3345 : }
3346 : case kX64Word64AtomicCompareExchangeUint8: {
3347 17 : __ lock();
3348 34 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3349 : __ movzxbq(rax, rax);
3350 : break;
3351 : }
3352 : case kX64Word64AtomicCompareExchangeUint16: {
3353 25 : __ lock();
3354 50 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3355 : __ movzxwq(rax, rax);
3356 : break;
3357 : }
3358 : case kX64Word64AtomicCompareExchangeUint32: {
3359 25 : __ lock();
3360 25 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3361 : break;
3362 : }
3363 : case kX64Word64AtomicCompareExchangeUint64: {
3364 265 : __ lock();
3365 265 : __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3366 : break;
3367 : }
3368 : #define ATOMIC64_BINOP_CASE(op, inst) \
3369 : case kX64Word64Atomic##op##Uint8: \
3370 : ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3371 : __ movzxbq(rax, rax); \
3372 : break; \
3373 : case kX64Word64Atomic##op##Uint16: \
3374 : ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3375 : __ movzxwq(rax, rax); \
3376 : break; \
3377 : case kX64Word64Atomic##op##Uint32: \
3378 : ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3379 : break; \
3380 : case kX64Word64Atomic##op##Uint64: \
3381 : ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3382 : break;
3383 10494 : ATOMIC64_BINOP_CASE(Add, addq)
3384 9814 : ATOMIC64_BINOP_CASE(Sub, subq)
3385 10786 : ATOMIC64_BINOP_CASE(And, andq)
3386 9146 : ATOMIC64_BINOP_CASE(Or, orq)
3387 11220 : ATOMIC64_BINOP_CASE(Xor, xorq)
3388 : #undef ATOMIC64_BINOP_CASE
3389 : case kWord32AtomicLoadInt8:
3390 : case kWord32AtomicLoadUint8:
3391 : case kWord32AtomicLoadInt16:
3392 : case kWord32AtomicLoadUint16:
3393 : case kWord32AtomicLoadWord32:
3394 : case kWord32AtomicStoreWord8:
3395 : case kWord32AtomicStoreWord16:
3396 : case kWord32AtomicStoreWord32:
3397 : case kX64Word64AtomicLoadUint8:
3398 : case kX64Word64AtomicLoadUint16:
3399 : case kX64Word64AtomicLoadUint32:
3400 : case kX64Word64AtomicLoadUint64:
3401 : case kX64Word64AtomicStoreWord8:
3402 : case kX64Word64AtomicStoreWord16:
3403 : case kX64Word64AtomicStoreWord32:
3404 : case kX64Word64AtomicStoreWord64:
3405 0 : UNREACHABLE(); // Won't be generated by instruction selector.
3406 : break;
3407 : }
3408 : return kSuccess;
3409 : } // NOLadability/fn_size)
3410 :
3411 : #undef ASSEMBLE_UNOP
3412 : #undef ASSEMBLE_BINOP
3413 : #undef ASSEMBLE_COMPARE
3414 : #undef ASSEMBLE_MULT
3415 : #undef ASSEMBLE_SHIFT
3416 : #undef ASSEMBLE_MOVX
3417 : #undef ASSEMBLE_SSE_BINOP
3418 : #undef ASSEMBLE_SSE_UNOP
3419 : #undef ASSEMBLE_AVX_BINOP
3420 : #undef ASSEMBLE_IEEE754_BINOP
3421 : #undef ASSEMBLE_IEEE754_UNOP
3422 : #undef ASSEMBLE_ATOMIC_BINOP
3423 : #undef ASSEMBLE_ATOMIC64_BINOP
3424 : #undef ASSEMBLE_SIMD_INSTR
3425 : #undef ASSEMBLE_SIMD_IMM_INSTR
3426 : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3427 : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
3428 :
3429 : namespace {
3430 :
3431 6099857 : Condition FlagsConditionToCondition(FlagsCondition condition) {
3432 6099857 : switch (condition) {
3433 : case kUnorderedEqual:
3434 : case kEqual:
3435 : return equal;
3436 : case kUnorderedNotEqual:
3437 : case kNotEqual:
3438 1390788 : return not_equal;
3439 : case kSignedLessThan:
3440 175793 : return less;
3441 : case kSignedGreaterThanOrEqual:
3442 57919 : return greater_equal;
3443 : case kSignedLessThanOrEqual:
3444 68783 : return less_equal;
3445 : case kSignedGreaterThan:
3446 74990 : return greater;
3447 : case kUnsignedLessThan:
3448 292394 : return below;
3449 : case kUnsignedGreaterThanOrEqual:
3450 222758 : return above_equal;
3451 : case kUnsignedLessThanOrEqual:
3452 903168 : return below_equal;
3453 : case kUnsignedGreaterThan:
3454 186290 : return above;
3455 : case kOverflow:
3456 179651 : return overflow;
3457 : case kNotOverflow:
3458 1456 : return no_overflow;
3459 : default:
3460 : break;
3461 : }
3462 0 : UNREACHABLE();
3463 : }
3464 :
3465 : } // namespace
3466 :
3467 : // Assembles branches after this instruction.
3468 5356336 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3469 : Label::Distance flabel_distance =
3470 5356336 : branch->fallthru ? Label::kNear : Label::kFar;
3471 5356336 : Label* tlabel = branch->true_label;
3472 5356336 : Label* flabel = branch->false_label;
3473 5356336 : if (branch->condition == kUnorderedEqual) {
3474 51281 : __ j(parity_even, flabel, flabel_distance);
3475 5305055 : } else if (branch->condition == kUnorderedNotEqual) {
3476 101657 : __ j(parity_even, tlabel);
3477 : }
3478 5356334 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3479 :
3480 5356333 : if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3481 5356333 : }
3482 :
3483 0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3484 : Instruction* instr) {
3485 : // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3486 0 : if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3487 0 : return;
3488 : }
3489 :
3490 : condition = NegateFlagsCondition(condition);
3491 0 : __ movl(kScratchRegister, Immediate(0));
3492 : __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3493 0 : kScratchRegister);
3494 : }
3495 :
3496 333240 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3497 288 : BranchInfo* branch) {
3498 : Label::Distance flabel_distance =
3499 333240 : branch->fallthru ? Label::kNear : Label::kFar;
3500 333240 : Label* tlabel = branch->true_label;
3501 333240 : Label* flabel = branch->false_label;
3502 333240 : Label nodeopt;
3503 333240 : if (branch->condition == kUnorderedEqual) {
3504 0 : __ j(parity_even, flabel, flabel_distance);
3505 333240 : } else if (branch->condition == kUnorderedNotEqual) {
3506 4105 : __ j(parity_even, tlabel);
3507 : }
3508 333240 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3509 :
3510 333237 : if (FLAG_deopt_every_n_times > 0) {
3511 : ExternalReference counter =
3512 288 : ExternalReference::stress_deopt_count(isolate());
3513 :
3514 288 : __ pushfq();
3515 288 : __ pushq(rax);
3516 288 : __ load_rax(counter);
3517 : __ decl(rax);
3518 288 : __ j(not_zero, &nodeopt);
3519 :
3520 288 : __ Set(rax, FLAG_deopt_every_n_times);
3521 288 : __ store_rax(counter);
3522 288 : __ popq(rax);
3523 288 : __ popfq();
3524 288 : __ jmp(tlabel);
3525 :
3526 288 : __ bind(&nodeopt);
3527 288 : __ store_rax(counter);
3528 288 : __ popq(rax);
3529 288 : __ popfq();
3530 : }
3531 :
3532 333237 : if (!branch->fallthru) {
3533 0 : __ jmp(flabel, flabel_distance);
3534 : }
3535 333237 : }
3536 :
3537 8012114 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
3538 8012114 : if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3539 5010264 : }
3540 :
3541 33442 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
3542 33442 : FlagsCondition condition) {
3543 : auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3544 33434 : Label* tlabel = ool->entry();
3545 33434 : Label end;
3546 33434 : if (condition == kUnorderedEqual) {
3547 0 : __ j(parity_even, &end);
3548 33434 : } else if (condition == kUnorderedNotEqual) {
3549 304 : __ j(parity_even, tlabel);
3550 : }
3551 33434 : __ j(FlagsConditionToCondition(condition), tlabel);
3552 33445 : __ bind(&end);
3553 33448 : }
3554 :
3555 : // Assembles boolean materializations after this instruction.
3556 753712 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3557 : FlagsCondition condition) {
3558 : X64OperandConverter i(this, instr);
3559 376856 : Label done;
3560 :
3561 : // Materialize a full 64-bit 1 or 0 value. The result register is always the
3562 : // last output of the instruction.
3563 376856 : Label check;
3564 : DCHECK_NE(0u, instr->OutputCount());
3565 376856 : Register reg = i.OutputRegister(instr->OutputCount() - 1);
3566 376856 : if (condition == kUnorderedEqual) {
3567 2923 : __ j(parity_odd, &check, Label::kNear);
3568 : __ movl(reg, Immediate(0));
3569 2925 : __ jmp(&done, Label::kNear);
3570 373933 : } else if (condition == kUnorderedNotEqual) {
3571 2582 : __ j(parity_odd, &check, Label::kNear);
3572 : __ movl(reg, Immediate(1));
3573 2581 : __ jmp(&done, Label::kNear);
3574 : }
3575 376857 : __ bind(&check);
3576 376859 : __ setcc(FlagsConditionToCondition(condition), reg);
3577 : __ movzxbl(reg, reg);
3578 376857 : __ bind(&done);
3579 376859 : }
3580 :
3581 473934 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3582 : X64OperandConverter i(this, instr);
3583 34721 : Register input = i.InputRegister(0);
3584 : std::vector<std::pair<int32_t, Label*>> cases;
3585 473934 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3586 606737 : cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3587 : }
3588 : AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3589 69444 : cases.data() + cases.size());
3590 34722 : }
3591 :
3592 0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3593 : X64OperandConverter i(this, instr);
3594 0 : Register input = i.InputRegister(0);
3595 0 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3596 0 : __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3597 0 : __ j(equal, GetLabel(i.InputRpo(index + 1)));
3598 : }
3599 0 : AssembleArchJump(i.InputRpo(1));
3600 0 : }
3601 :
3602 201992 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3603 : X64OperandConverter i(this, instr);
3604 314 : Register input = i.InputRegister(0);
3605 314 : int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3606 314 : Label** cases = zone()->NewArray<Label*>(case_count);
3607 201364 : for (int32_t index = 0; index < case_count; ++index) {
3608 402100 : cases[index] = GetLabel(i.InputRpo(index + 2));
3609 : }
3610 314 : Label* const table = AddJumpTable(cases, case_count);
3611 314 : __ cmpl(input, Immediate(case_count));
3612 628 : __ j(above_equal, GetLabel(i.InputRpo(1)));
3613 628 : __ leaq(kScratchRegister, Operand(table));
3614 314 : __ jmp(Operand(kScratchRegister, input, times_8, 0));
3615 314 : }
3616 :
3617 : namespace {
3618 :
3619 : static const int kQuadWordSize = 16;
3620 :
3621 : } // namespace
3622 :
3623 2141387 : void CodeGenerator::FinishFrame(Frame* frame) {
3624 4282774 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3625 :
3626 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3627 2141387 : if (saves_fp != 0) {
3628 : frame->AlignSavedCalleeRegisterSlots();
3629 0 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3630 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3631 : frame->AllocateSavedCalleeRegisterSlots(
3632 0 : saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3633 : }
3634 : }
3635 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3636 2141387 : if (saves != 0) { // Save callee-saved registers.
3637 : int count = 0;
3638 14467344 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3639 14467344 : if (((1 << i) & saves)) {
3640 4521045 : ++count;
3641 : }
3642 : }
3643 : frame->AllocateSavedCalleeRegisterSlots(count);
3644 : }
3645 2141387 : }
3646 :
3647 14679283 : void CodeGenerator::AssembleConstructFrame() {
3648 4913318 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3649 2151203 : if (frame_access_state()->has_frame()) {
3650 2151192 : int pc_base = __ pc_offset();
3651 :
3652 2151192 : if (call_descriptor->IsCFunctionCall()) {
3653 904209 : __ pushq(rbp);
3654 : __ movq(rbp, rsp);
3655 1246983 : } else if (call_descriptor->IsJSFunctionCall()) {
3656 636035 : __ Prologue();
3657 636037 : if (call_descriptor->PushArgumentCount()) {
3658 38776 : __ pushq(kJavaScriptCallArgCountRegister);
3659 : }
3660 : } else {
3661 610948 : __ StubPrologue(info()->GetOutputStackFrameType());
3662 610966 : if (call_descriptor->IsWasmFunctionCall()) {
3663 499642 : __ pushq(kWasmInstanceRegister);
3664 111324 : } else if (call_descriptor->IsWasmImportWrapper()) {
3665 : // WASM import wrappers are passed a tuple in the place of the instance.
3666 : // Unpack the tuple into the instance and the target callable.
3667 : // This must be done here in the codegen because it cannot be expressed
3668 : // properly in the graph.
3669 : __ LoadTaggedPointerField(
3670 : kJSFunctionRegister,
3671 6342 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3672 : __ LoadTaggedPointerField(
3673 : kWasmInstanceRegister,
3674 6342 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3675 6342 : __ pushq(kWasmInstanceRegister);
3676 : }
3677 : }
3678 :
3679 2151207 : unwinding_info_writer_.MarkFrameConstructed(pc_base);
3680 : }
3681 2151214 : int required_slots = frame()->GetTotalFrameSlotCount() -
3682 2151214 : call_descriptor->CalculateFixedFrameSize();
3683 :
3684 2151266 : if (info()->is_osr()) {
3685 : // TurboFan OSR-compiled functions cannot be entered directly.
3686 4991 : __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3687 :
3688 : // Unoptimized code jumps directly to this entrypoint while the unoptimized
3689 : // frame is still on the stack. Optimized code uses OSR values directly from
3690 : // the unoptimized frame. Thus, all that needs to be done is to allocate the
3691 : // remaining stack slots.
3692 4991 : if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3693 9982 : osr_pc_offset_ = __ pc_offset();
3694 4991 : required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3695 4991 : ResetSpeculationPoison();
3696 : }
3697 :
3698 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3699 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3700 :
3701 2151160 : if (required_slots > 0) {
3702 : DCHECK(frame_access_state()->has_frame());
3703 1656136 : if (info()->IsWasm() && required_slots > 128) {
3704 : // For WebAssembly functions with big frames we have to do the stack
3705 : // overflow check before we construct the frame. Otherwise we may not
3706 : // have enough space on the stack to call the runtime for the stack
3707 : // overflow.
3708 8 : Label done;
3709 :
3710 : // If the frame is bigger than the stack, we throw the stack overflow
3711 : // exception unconditionally. Thereby we can avoid the integer overflow
3712 : // check in the condition code.
3713 8 : if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3714 : __ movq(kScratchRegister,
3715 : FieldOperand(kWasmInstanceRegister,
3716 8 : WasmInstanceObject::kRealStackLimitAddressOffset));
3717 16 : __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3718 : __ addq(kScratchRegister,
3719 8 : Immediate(required_slots * kSystemPointerSize));
3720 8 : __ cmpq(rsp, kScratchRegister);
3721 8 : __ j(above_equal, &done);
3722 : }
3723 : __ LoadTaggedPointerField(
3724 : rcx, FieldOperand(kWasmInstanceRegister,
3725 8 : WasmInstanceObject::kCEntryStubOffset));
3726 8 : __ Move(rsi, Smi::zero());
3727 8 : __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
3728 : ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3729 : RecordSafepoint(reference_map, Safepoint::kSimple,
3730 8 : Safepoint::kNoLazyDeopt);
3731 8 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3732 8 : __ bind(&done);
3733 : }
3734 :
3735 : // Skip callee-saved and return slots, which are created below.
3736 1656136 : required_slots -= base::bits::CountPopulation(saves);
3737 : required_slots -= base::bits::CountPopulation(saves_fp) *
3738 1656136 : (kQuadWordSize / kSystemPointerSize);
3739 1656136 : required_slots -= frame()->GetReturnSlotCount();
3740 1656136 : if (required_slots > 0) {
3741 2959230 : __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
3742 : }
3743 : }
3744 :
3745 2151169 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3746 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3747 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3748 : // Adjust the stack pointer.
3749 0 : __ subq(rsp, Immediate(stack_size));
3750 : // Store the registers on the stack.
3751 : int slot_idx = 0;
3752 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3753 0 : if (!((1 << i) & saves_fp)) continue;
3754 : __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3755 0 : XMMRegister::from_code(i));
3756 0 : slot_idx++;
3757 : }
3758 : }
3759 :
3760 2151169 : if (saves != 0) { // Save callee-saved registers.
3761 14467344 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3762 14467344 : if (!((1 << i) & saves)) continue;
3763 4521045 : __ pushq(Register::from_code(i));
3764 : }
3765 : }
3766 :
3767 : // Allocate return slots (located after callee-saved).
3768 2151169 : if (frame()->GetReturnSlotCount() > 0) {
3769 1344 : __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3770 : }
3771 2151169 : }
3772 :
3773 4852778 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3774 9705556 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3775 :
3776 : // Restore registers.
3777 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3778 2426389 : if (saves != 0) {
3779 913577 : const int returns = frame()->GetReturnSlotCount();
3780 913577 : if (returns != 0) {
3781 1312 : __ addq(rsp, Immediate(returns * kSystemPointerSize));
3782 : }
3783 14617232 : for (int i = 0; i < Register::kNumRegisters; i++) {
3784 14617232 : if (!((1 << i) & saves)) continue;
3785 4567885 : __ popq(Register::from_code(i));
3786 : }
3787 : }
3788 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3789 2426389 : if (saves_fp != 0) {
3790 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3791 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3792 : // Load the registers from the stack.
3793 : int slot_idx = 0;
3794 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3795 0 : if (!((1 << i) & saves_fp)) continue;
3796 : __ movdqu(XMMRegister::from_code(i),
3797 0 : Operand(rsp, kQuadWordSize * slot_idx));
3798 0 : slot_idx++;
3799 : }
3800 : // Adjust the stack pointer.
3801 0 : __ addq(rsp, Immediate(stack_size));
3802 : }
3803 :
3804 : unwinding_info_writer_.MarkBlockWillExit();
3805 :
3806 : // Might need rcx for scratch if pop_size is too big or if there is a variable
3807 : // pop count.
3808 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3809 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3810 2426389 : size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3811 : X64OperandConverter g(this, nullptr);
3812 2426389 : if (call_descriptor->IsCFunctionCall()) {
3813 913577 : AssembleDeconstructFrame();
3814 1512812 : } else if (frame_access_state()->has_frame()) {
3815 2886818 : if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3816 : // Canonicalize JSFunction return sites for now.
3817 1417352 : if (return_label_.is_bound()) {
3818 280721 : __ jmp(&return_label_);
3819 2426587 : return;
3820 : } else {
3821 1136631 : __ bind(&return_label_);
3822 1136635 : AssembleDeconstructFrame();
3823 : }
3824 : } else {
3825 52108 : AssembleDeconstructFrame();
3826 : }
3827 : }
3828 :
3829 2145873 : if (pop->IsImmediate()) {
3830 4187561 : pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3831 2093772 : CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3832 2093772 : __ Ret(static_cast<int>(pop_size), rcx);
3833 : } else {
3834 : Register pop_reg = g.ToRegister(pop);
3835 52084 : Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3836 52084 : __ popq(scratch_reg);
3837 104168 : __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3838 52084 : __ jmp(scratch_reg);
3839 : }
3840 : }
3841 :
3842 2141249 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3843 :
3844 35844153 : void CodeGenerator::AssembleMove(InstructionOperand* source,
3845 : InstructionOperand* destination) {
3846 : X64OperandConverter g(this, nullptr);
3847 : // Helper function to write the given constant to the dst register.
3848 18204275 : auto MoveConstantToRegister = [&](Register dst, Constant src) {
3849 18204275 : switch (src.type()) {
3850 : case Constant::kInt32: {
3851 3528041 : if (RelocInfo::IsWasmReference(src.rmode())) {
3852 0 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3853 : } else {
3854 3528041 : int32_t value = src.ToInt32();
3855 3528041 : if (value == 0) {
3856 1007196 : __ xorl(dst, dst);
3857 : } else {
3858 2520845 : __ movl(dst, Immediate(value));
3859 : }
3860 : }
3861 : break;
3862 : }
3863 : case Constant::kInt64:
3864 2184653 : if (RelocInfo::IsWasmReference(src.rmode())) {
3865 0 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3866 : } else {
3867 2184653 : __ Set(dst, src.ToInt64());
3868 : }
3869 : break;
3870 : case Constant::kFloat32:
3871 672 : __ MoveNumber(dst, src.ToFloat32());
3872 336 : break;
3873 : case Constant::kFloat64:
3874 1775883 : __ MoveNumber(dst, src.ToFloat64().value());
3875 1775884 : break;
3876 : case Constant::kExternalReference:
3877 4703286 : __ Move(dst, src.ToExternalReference());
3878 2351652 : break;
3879 : case Constant::kHeapObject: {
3880 8361585 : Handle<HeapObject> src_object = src.ToHeapObject();
3881 : RootIndex index;
3882 8361589 : if (IsMaterializableFromRoot(src_object, &index)) {
3883 1852870 : __ LoadRoot(dst, index);
3884 : } else {
3885 6508719 : __ Move(dst, src_object);
3886 : }
3887 : break;
3888 : }
3889 : case Constant::kDelayedStringConstant: {
3890 2088 : const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3891 2088 : __ MoveStringConstant(dst, src_constant);
3892 2088 : break;
3893 : }
3894 : case Constant::kRpoNumber:
3895 0 : UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3896 : break;
3897 : }
3898 54048490 : };
3899 : // Helper function to write the given constant to the stack.
3900 38695 : auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3901 38695 : if (!RelocInfo::IsWasmReference(src.rmode())) {
3902 38695 : switch (src.type()) {
3903 : case Constant::kInt32:
3904 19710 : __ movq(dst, Immediate(src.ToInt32()));
3905 19711 : return;
3906 : case Constant::kInt64:
3907 13056 : __ Set(dst, src.ToInt64());
3908 13056 : return;
3909 : default:
3910 : break;
3911 : }
3912 : }
3913 5929 : MoveConstantToRegister(kScratchRegister, src);
3914 5929 : __ movq(dst, kScratchRegister);
3915 35844153 : };
3916 : // Dispatch on the source and destination operand kinds.
3917 35844153 : switch (MoveType::InferMove(source, destination)) {
3918 : case MoveType::kRegisterToRegister:
3919 4105376 : if (source->IsRegister()) {
3920 3994296 : __ movq(g.ToRegister(destination), g.ToRegister(source));
3921 : } else {
3922 : DCHECK(source->IsFPRegister());
3923 : __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3924 : }
3925 : return;
3926 : case MoveType::kRegisterToStack: {
3927 4948615 : Operand dst = g.ToOperand(destination);
3928 4948615 : if (source->IsRegister()) {
3929 4659959 : __ movq(dst, g.ToRegister(source));
3930 : } else {
3931 : DCHECK(source->IsFPRegister());
3932 : XMMRegister src = g.ToDoubleRegister(source);
3933 : MachineRepresentation rep =
3934 : LocationOperand::cast(source)->representation();
3935 288656 : if (rep != MachineRepresentation::kSimd128) {
3936 : __ Movsd(dst, src);
3937 : } else {
3938 : __ Movups(dst, src);
3939 : }
3940 : }
3941 : return;
3942 : }
3943 : case MoveType::kStackToRegister: {
3944 8186738 : Operand src = g.ToOperand(source);
3945 8186738 : if (source->IsStackSlot()) {
3946 7796947 : __ movq(g.ToRegister(destination), src);
3947 : } else {
3948 : DCHECK(source->IsFPStackSlot());
3949 : XMMRegister dst = g.ToDoubleRegister(destination);
3950 : MachineRepresentation rep =
3951 : LocationOperand::cast(source)->representation();
3952 389791 : if (rep != MachineRepresentation::kSimd128) {
3953 : __ Movsd(dst, src);
3954 : } else {
3955 : __ Movups(dst, src);
3956 : }
3957 : }
3958 : return;
3959 : }
3960 : case MoveType::kStackToStack: {
3961 44286 : Operand src = g.ToOperand(source);
3962 44286 : Operand dst = g.ToOperand(destination);
3963 44286 : if (source->IsStackSlot()) {
3964 : // Spill on demand to use a temporary register for memory-to-memory
3965 : // moves.
3966 24057 : __ movq(kScratchRegister, src);
3967 : __ movq(dst, kScratchRegister);
3968 : } else {
3969 : MachineRepresentation rep =
3970 : LocationOperand::cast(source)->representation();
3971 20229 : if (rep != MachineRepresentation::kSimd128) {
3972 : __ Movsd(kScratchDoubleReg, src);
3973 : __ Movsd(dst, kScratchDoubleReg);
3974 : } else {
3975 : DCHECK(source->IsSimd128StackSlot());
3976 : __ Movups(kScratchDoubleReg, src);
3977 : __ Movups(dst, kScratchDoubleReg);
3978 : }
3979 : }
3980 : return;
3981 : }
3982 : case MoveType::kConstantToRegister: {
3983 18513994 : Constant src = g.ToConstant(source);
3984 18514069 : if (destination->IsRegister()) {
3985 18198404 : MoveConstantToRegister(g.ToRegister(destination), src);
3986 : } else {
3987 : DCHECK(destination->IsFPRegister());
3988 315665 : XMMRegister dst = g.ToDoubleRegister(destination);
3989 315665 : if (src.type() == Constant::kFloat32) {
3990 : // TODO(turbofan): Can we do better here?
3991 12393 : __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
3992 : } else {
3993 : DCHECK_EQ(src.type(), Constant::kFloat64);
3994 303272 : __ Move(dst, src.ToFloat64().AsUint64());
3995 : }
3996 : }
3997 : return;
3998 : }
3999 : case MoveType::kConstantToStack: {
4000 45287 : Constant src = g.ToConstant(source);
4001 45286 : Operand dst = g.ToOperand(destination);
4002 45286 : if (destination->IsStackSlot()) {
4003 38695 : MoveConstantToSlot(dst, src);
4004 : } else {
4005 : DCHECK(destination->IsFPStackSlot());
4006 6591 : if (src.type() == Constant::kFloat32) {
4007 3032 : __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4008 : } else {
4009 : DCHECK_EQ(src.type(), Constant::kFloat64);
4010 3559 : __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4011 : __ movq(dst, kScratchRegister);
4012 : }
4013 : }
4014 : return;
4015 : }
4016 : }
4017 0 : UNREACHABLE();
4018 : }
4019 :
4020 75903 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
4021 3230 : InstructionOperand* destination) {
4022 : X64OperandConverter g(this, nullptr);
4023 : // Dispatch on the source and destination operand kinds. Not all
4024 : // combinations are possible.
4025 75903 : switch (MoveType::InferSwap(source, destination)) {
4026 : case MoveType::kRegisterToRegister: {
4027 65287 : if (source->IsRegister()) {
4028 : Register src = g.ToRegister(source);
4029 : Register dst = g.ToRegister(destination);
4030 62831 : __ movq(kScratchRegister, src);
4031 : __ movq(src, dst);
4032 : __ movq(dst, kScratchRegister);
4033 : } else {
4034 : DCHECK(source->IsFPRegister());
4035 : XMMRegister src = g.ToDoubleRegister(source);
4036 : XMMRegister dst = g.ToDoubleRegister(destination);
4037 : __ Movapd(kScratchDoubleReg, src);
4038 : __ Movapd(src, dst);
4039 : __ Movapd(dst, kScratchDoubleReg);
4040 : }
4041 : return;
4042 : }
4043 : case MoveType::kRegisterToStack: {
4044 6899 : if (source->IsRegister()) {
4045 : Register src = g.ToRegister(source);
4046 1615 : __ pushq(src);
4047 : frame_access_state()->IncreaseSPDelta(1);
4048 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4049 3230 : kSystemPointerSize);
4050 : __ movq(src, g.ToOperand(destination));
4051 : frame_access_state()->IncreaseSPDelta(-1);
4052 1615 : __ popq(g.ToOperand(destination));
4053 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4054 3230 : -kSystemPointerSize);
4055 : } else {
4056 : DCHECK(source->IsFPRegister());
4057 : XMMRegister src = g.ToDoubleRegister(source);
4058 5284 : Operand dst = g.ToOperand(destination);
4059 : MachineRepresentation rep =
4060 : LocationOperand::cast(source)->representation();
4061 5284 : if (rep != MachineRepresentation::kSimd128) {
4062 : __ Movsd(kScratchDoubleReg, src);
4063 : __ Movsd(src, dst);
4064 : __ Movsd(dst, kScratchDoubleReg);
4065 : } else {
4066 : __ Movups(kScratchDoubleReg, src);
4067 : __ Movups(src, dst);
4068 : __ Movups(dst, kScratchDoubleReg);
4069 : }
4070 : }
4071 : return;
4072 : }
4073 : case MoveType::kStackToStack: {
4074 3717 : Operand src = g.ToOperand(source);
4075 3717 : Operand dst = g.ToOperand(destination);
4076 : MachineRepresentation rep =
4077 : LocationOperand::cast(source)->representation();
4078 3717 : if (rep != MachineRepresentation::kSimd128) {
4079 : Register tmp = kScratchRegister;
4080 2981 : __ movq(tmp, dst);
4081 2981 : __ pushq(src); // Then use stack to copy src to destination.
4082 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4083 5962 : kSystemPointerSize);
4084 2981 : __ popq(dst);
4085 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4086 5962 : -kSystemPointerSize);
4087 : __ movq(src, tmp);
4088 : } else {
4089 : // Without AVX, misaligned reads and writes will trap. Move using the
4090 : // stack, in two parts.
4091 736 : __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4092 736 : __ pushq(src); // Then use stack to copy src to destination.
4093 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4094 1472 : kSystemPointerSize);
4095 736 : __ popq(dst);
4096 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4097 1472 : -kSystemPointerSize);
4098 736 : __ pushq(g.ToOperand(source, kSystemPointerSize));
4099 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4100 1472 : kSystemPointerSize);
4101 736 : __ popq(g.ToOperand(destination, kSystemPointerSize));
4102 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4103 1472 : -kSystemPointerSize);
4104 736 : __ movups(src, kScratchDoubleReg);
4105 : }
4106 : return;
4107 : }
4108 : default:
4109 0 : UNREACHABLE();
4110 : break;
4111 : }
4112 : }
4113 :
4114 314 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4115 201364 : for (size_t index = 0; index < target_count; ++index) {
4116 201050 : __ dq(targets[index]);
4117 : }
4118 314 : }
4119 :
4120 : #undef __
4121 :
4122 : } // namespace compiler
4123 : } // namespace internal
4124 178779 : } // namespace v8
|