Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/code-generator.h"
6 :
7 : #include <limits>
8 :
9 : #include "src/base/overflowing-math.h"
10 : #include "src/compiler/backend/code-generator-impl.h"
11 : #include "src/compiler/backend/gap-resolver.h"
12 : #include "src/compiler/node-matchers.h"
13 : #include "src/compiler/osr.h"
14 : #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 : #include "src/macro-assembler.h"
16 : #include "src/objects/smi.h"
17 : #include "src/optimized-compilation-info.h"
18 : #include "src/wasm/wasm-code-manager.h"
19 : #include "src/wasm/wasm-objects.h"
20 : #include "src/x64/assembler-x64.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 : namespace compiler {
25 :
26 : #define __ tasm()->
27 :
28 : // Adds X64 specific methods for decoding operands.
29 : class X64OperandConverter : public InstructionOperandConverter {
30 : public:
31 : X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : : InstructionOperandConverter(gen, instr) {}
33 :
34 : Immediate InputImmediate(size_t index) {
35 5784831 : return ToImmediate(instr_->InputAt(index));
36 : }
37 :
38 1120542 : Operand InputOperand(size_t index, int extra = 0) {
39 2241135 : return ToOperand(instr_->InputAt(index), extra);
40 : }
41 :
42 0 : Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 :
44 4794625 : Immediate ToImmediate(InstructionOperand* operand) {
45 4794625 : Constant constant = ToConstant(operand);
46 4794638 : if (constant.type() == Constant::kFloat64) {
47 : DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 366325 : return Immediate(0);
49 : }
50 4428313 : if (RelocInfo::IsWasmReference(constant.rmode())) {
51 0 : return Immediate(constant.ToInt32(), constant.rmode());
52 : }
53 4428313 : return Immediate(constant.ToInt32());
54 : }
55 :
56 : Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 : DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 16096980 : return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 : }
60 :
61 16109848 : Operand SlotToOperand(int slot_index, int extra = 0) {
62 16109848 : FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 : return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 32219644 : offset.offset() + extra);
65 : }
66 :
67 : static size_t NextOffset(size_t* offset) {
68 16228266 : size_t i = *offset;
69 29190059 : (*offset)++;
70 : return i;
71 : }
72 :
73 : static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 : STATIC_ASSERT(0 == static_cast<int>(times_1));
75 : STATIC_ASSERT(1 == static_cast<int>(times_2));
76 : STATIC_ASSERT(2 == static_cast<int>(times_4));
77 : STATIC_ASSERT(3 == static_cast<int>(times_8));
78 1504927 : int scale = static_cast<int>(mode - one);
79 : DCHECK(scale >= 0 && scale < 4);
80 1504927 : return static_cast<ScaleFactor>(scale);
81 : }
82 :
83 16228266 : Operand MemoryOperand(size_t* offset) {
84 16228266 : AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 16228266 : switch (mode) {
86 : case kMode_MR: {
87 2698229 : Register base = InputRegister(NextOffset(offset));
88 : int32_t disp = 0;
89 2698229 : return Operand(base, disp);
90 : }
91 : case kMode_MRI: {
92 10942017 : Register base = InputRegister(NextOffset(offset));
93 : int32_t disp = InputInt32(NextOffset(offset));
94 10941997 : return Operand(base, disp);
95 : }
96 : case kMode_MR1:
97 : case kMode_MR2:
98 : case kMode_MR4:
99 : case kMode_MR8: {
100 807138 : Register base = InputRegister(NextOffset(offset));
101 807138 : Register index = InputRegister(NextOffset(offset));
102 : ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 : int32_t disp = 0;
104 807138 : return Operand(base, index, scale, disp);
105 : }
106 : case kMode_MR1I:
107 : case kMode_MR2I:
108 : case kMode_MR4I:
109 : case kMode_MR8I: {
110 533482 : Register base = InputRegister(NextOffset(offset));
111 533482 : Register index = InputRegister(NextOffset(offset));
112 : ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 : int32_t disp = InputInt32(NextOffset(offset));
114 533480 : return Operand(base, index, scale, disp);
115 : }
116 : case kMode_M1: {
117 0 : Register base = InputRegister(NextOffset(offset));
118 : int32_t disp = 0;
119 0 : return Operand(base, disp);
120 : }
121 : case kMode_M2:
122 0 : UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 : return Operand(no_reg, 0);
124 : case kMode_M4:
125 : case kMode_M8: {
126 18633 : Register index = InputRegister(NextOffset(offset));
127 : ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 : int32_t disp = 0;
129 18633 : return Operand(index, scale, disp);
130 : }
131 : case kMode_M1I:
132 : case kMode_M2I:
133 : case kMode_M4I:
134 : case kMode_M8I: {
135 145674 : Register index = InputRegister(NextOffset(offset));
136 : ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 : int32_t disp = InputInt32(NextOffset(offset));
138 145674 : return Operand(index, scale, disp);
139 : }
140 : case kMode_Root: {
141 1083093 : Register base = kRootRegister;
142 : int32_t disp = InputInt32(NextOffset(offset));
143 1083094 : return Operand(base, disp);
144 : }
145 : case kMode_None:
146 0 : UNREACHABLE();
147 : }
148 0 : UNREACHABLE();
149 : }
150 :
151 : Operand MemoryOperand(size_t first_input = 0) {
152 9387526 : return MemoryOperand(&first_input);
153 : }
154 : };
155 :
156 : namespace {
157 :
158 : bool HasImmediateInput(Instruction* instr, size_t index) {
159 : return instr->InputAt(index)->IsImmediate();
160 : }
161 :
162 0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 : public:
164 : OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 132 : : OutOfLineCode(gen), result_(result) {}
166 :
167 129 : void Generate() final {
168 : __ Xorps(result_, result_);
169 : __ Divss(result_, result_);
170 132 : }
171 :
172 : private:
173 : XMMRegister const result_;
174 : };
175 :
176 0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 : public:
178 : OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 589 : : OutOfLineCode(gen), result_(result) {}
180 :
181 587 : void Generate() final {
182 : __ Xorpd(result_, result_);
183 : __ Divsd(result_, result_);
184 588 : }
185 :
186 : private:
187 : XMMRegister const result_;
188 : };
189 :
190 0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 : public:
192 : OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 : XMMRegister input, StubCallMode stub_mode,
194 : UnwindingInfoWriter* unwinding_info_writer)
195 : : OutOfLineCode(gen),
196 : result_(result),
197 : input_(input),
198 : stub_mode_(stub_mode),
199 : unwinding_info_writer_(unwinding_info_writer),
200 : isolate_(gen->isolate()),
201 55434 : zone_(gen->zone()) {}
202 :
203 55422 : void Generate() final {
204 55422 : __ subq(rsp, Immediate(kDoubleSize));
205 55424 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 55424 : kDoubleSize);
207 110849 : __ Movsd(MemOperand(rsp, 0), input_);
208 55424 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 : // A direct call to a wasm runtime stub defined in this module.
210 : // Just encode the stub index. This will be patched when the code
211 : // is added to the native module and copied into wasm code space.
212 1593 : __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 : } else {
214 107662 : __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 : }
216 110853 : __ movl(result_, MemOperand(rsp, 0));
217 55428 : __ addq(rsp, Immediate(kDoubleSize));
218 55425 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 55425 : -kDoubleSize);
220 55424 : }
221 :
222 : private:
223 : Register const result_;
224 : XMMRegister const input_;
225 : StubCallMode stub_mode_;
226 : UnwindingInfoWriter* const unwinding_info_writer_;
227 : Isolate* isolate_;
228 : Zone* zone_;
229 : };
230 :
231 0 : class OutOfLineRecordWrite final : public OutOfLineCode {
232 : public:
233 : OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 : Register value, Register scratch0, Register scratch1,
235 : RecordWriteMode mode, StubCallMode stub_mode)
236 : : OutOfLineCode(gen),
237 : object_(object),
238 : operand_(operand),
239 : value_(value),
240 : scratch0_(scratch0),
241 : scratch1_(scratch1),
242 : mode_(mode),
243 : stub_mode_(stub_mode),
244 328905 : zone_(gen->zone()) {}
245 :
246 328903 : void Generate() final {
247 328903 : if (mode_ > RecordWriteMode::kValueIsPointer) {
248 260146 : __ JumpIfSmi(value_, exit());
249 : }
250 : __ CheckPageFlag(value_, scratch0_,
251 : MemoryChunk::kPointersToHereAreInterestingMask, zero,
252 328902 : exit());
253 328905 : __ leaq(scratch1_, operand_);
254 :
255 : RememberedSetAction const remembered_set_action =
256 328906 : mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
257 328906 : : OMIT_REMEMBERED_SET;
258 : SaveFPRegsMode const save_fp_mode =
259 328906 : frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
260 :
261 328906 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
262 : // A direct call to a wasm runtime stub defined in this module.
263 : // Just encode the stub index. This will be patched when the code
264 : // is added to the native module and copied into wasm code space.
265 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
266 312 : save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
267 : } else {
268 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
269 328594 : save_fp_mode);
270 : }
271 328906 : }
272 :
273 : private:
274 : Register const object_;
275 : Operand const operand_;
276 : Register const value_;
277 : Register const scratch0_;
278 : Register const scratch1_;
279 : RecordWriteMode const mode_;
280 : StubCallMode const stub_mode_;
281 : Zone* zone_;
282 : };
283 :
284 0 : class WasmOutOfLineTrap : public OutOfLineCode {
285 : public:
286 : WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
287 272259 : : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
288 :
289 34507 : void Generate() override {
290 34507 : X64OperandConverter i(gen_, instr_);
291 : TrapId trap_id =
292 69021 : static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
293 : GenerateWithTrapId(trap_id);
294 34529 : }
295 :
296 : protected:
297 : CodeGenerator* gen_;
298 :
299 272084 : void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
300 :
301 : private:
302 271972 : void GenerateCallToTrap(TrapId trap_id) {
303 271972 : if (!gen_->wasm_runtime_exception_support()) {
304 : // We cannot test calls to the runtime in cctest/test-run-wasm.
305 : // Therefore we emit a call to C here instead of a call to the runtime.
306 153612 : __ PrepareCallCFunction(0);
307 153612 : __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
308 153612 : 0);
309 153612 : __ LeaveFrame(StackFrame::WASM_COMPILED);
310 153612 : auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
311 : size_t pop_size =
312 153612 : call_descriptor->StackParameterCount() * kSystemPointerSize;
313 : // Use rcx as a scratch register, we return anyways immediately.
314 153612 : __ Ret(static_cast<int>(pop_size), rcx);
315 : } else {
316 118410 : gen_->AssembleSourcePosition(instr_);
317 : // A direct call to a wasm runtime stub defined in this module.
318 : // Just encode the stub index. This will be patched when the code
319 : // is added to the native module and copied into wasm code space.
320 118574 : __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
321 : ReferenceMap* reference_map =
322 118389 : new (gen_->zone()) ReferenceMap(gen_->zone());
323 118322 : gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
324 118322 : Safepoint::kNoLazyDeopt);
325 118732 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
326 : }
327 272077 : }
328 :
329 : Instruction* instr_;
330 : };
331 :
332 0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
333 : public:
334 : WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
335 237347 : : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
336 :
337 237433 : void Generate() final {
338 237433 : gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
339 237570 : GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
340 237578 : }
341 :
342 : private:
343 : int pc_;
344 : };
345 :
346 6390153 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
347 : InstructionCode opcode, Instruction* instr,
348 : X64OperandConverter& i, int pc) {
349 : const MemoryAccessMode access_mode =
350 6390153 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
351 6390153 : if (access_mode == kMemoryAccessProtected) {
352 : new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
353 : }
354 6389724 : }
355 :
356 5668491 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
357 : InstructionCode opcode, Instruction* instr,
358 : X64OperandConverter& i) {
359 : const MemoryAccessMode access_mode =
360 5668491 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
361 5668491 : if (access_mode == kMemoryAccessPoisoned) {
362 : Register value = i.OutputRegister();
363 0 : codegen->tasm()->andq(value, kSpeculationPoisonRegister);
364 : }
365 5668491 : }
366 :
367 : } // namespace
368 :
369 : #define ASSEMBLE_UNOP(asm_instr) \
370 : do { \
371 : if (instr->Output()->IsRegister()) { \
372 : __ asm_instr(i.OutputRegister()); \
373 : } else { \
374 : __ asm_instr(i.OutputOperand()); \
375 : } \
376 : } while (false)
377 :
378 : #define ASSEMBLE_BINOP(asm_instr) \
379 : do { \
380 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
381 : size_t index = 1; \
382 : Operand right = i.MemoryOperand(&index); \
383 : __ asm_instr(i.InputRegister(0), right); \
384 : } else { \
385 : if (HasImmediateInput(instr, 1)) { \
386 : if (instr->InputAt(0)->IsRegister()) { \
387 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
388 : } else { \
389 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
390 : } \
391 : } else { \
392 : if (instr->InputAt(1)->IsRegister()) { \
393 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
394 : } else { \
395 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
396 : } \
397 : } \
398 : } \
399 : } while (false)
400 :
401 : #define ASSEMBLE_COMPARE(asm_instr) \
402 : do { \
403 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
404 : size_t index = 0; \
405 : Operand left = i.MemoryOperand(&index); \
406 : if (HasImmediateInput(instr, index)) { \
407 : __ asm_instr(left, i.InputImmediate(index)); \
408 : } else { \
409 : __ asm_instr(left, i.InputRegister(index)); \
410 : } \
411 : } else { \
412 : if (HasImmediateInput(instr, 1)) { \
413 : if (instr->InputAt(0)->IsRegister()) { \
414 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
415 : } else { \
416 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
417 : } \
418 : } else { \
419 : if (instr->InputAt(1)->IsRegister()) { \
420 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
421 : } else { \
422 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
423 : } \
424 : } \
425 : } \
426 : } while (false)
427 :
428 : #define ASSEMBLE_MULT(asm_instr) \
429 : do { \
430 : if (HasImmediateInput(instr, 1)) { \
431 : if (instr->InputAt(0)->IsRegister()) { \
432 : __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
433 : i.InputImmediate(1)); \
434 : } else { \
435 : __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
436 : i.InputImmediate(1)); \
437 : } \
438 : } else { \
439 : if (instr->InputAt(1)->IsRegister()) { \
440 : __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
441 : } else { \
442 : __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
443 : } \
444 : } \
445 : } while (false)
446 :
447 : #define ASSEMBLE_SHIFT(asm_instr, width) \
448 : do { \
449 : if (HasImmediateInput(instr, 1)) { \
450 : if (instr->Output()->IsRegister()) { \
451 : __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
452 : } else { \
453 : __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
454 : } \
455 : } else { \
456 : if (instr->Output()->IsRegister()) { \
457 : __ asm_instr##_cl(i.OutputRegister()); \
458 : } else { \
459 : __ asm_instr##_cl(i.OutputOperand()); \
460 : } \
461 : } \
462 : } while (false)
463 :
464 : #define ASSEMBLE_MOVX(asm_instr) \
465 : do { \
466 : if (instr->addressing_mode() != kMode_None) { \
467 : __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
468 : } else if (instr->InputAt(0)->IsRegister()) { \
469 : __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
470 : } else { \
471 : __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
472 : } \
473 : } while (false)
474 :
475 : #define ASSEMBLE_SSE_BINOP(asm_instr) \
476 : do { \
477 : if (instr->InputAt(1)->IsFPRegister()) { \
478 : __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
479 : } else { \
480 : __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
481 : } \
482 : } while (false)
483 :
484 : #define ASSEMBLE_SSE_UNOP(asm_instr) \
485 : do { \
486 : if (instr->InputAt(0)->IsFPRegister()) { \
487 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
488 : } else { \
489 : __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
490 : } \
491 : } while (false)
492 :
493 : #define ASSEMBLE_AVX_BINOP(asm_instr) \
494 : do { \
495 : CpuFeatureScope avx_scope(tasm(), AVX); \
496 : if (instr->InputAt(1)->IsFPRegister()) { \
497 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
498 : i.InputDoubleRegister(1)); \
499 : } else { \
500 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
501 : i.InputOperand(1)); \
502 : } \
503 : } while (false)
504 :
505 : #define ASSEMBLE_IEEE754_BINOP(name) \
506 : do { \
507 : __ PrepareCallCFunction(2); \
508 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
509 : } while (false)
510 :
511 : #define ASSEMBLE_IEEE754_UNOP(name) \
512 : do { \
513 : __ PrepareCallCFunction(1); \
514 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
515 : } while (false)
516 :
517 : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
518 : do { \
519 : Label binop; \
520 : __ bind(&binop); \
521 : __ mov_inst(rax, i.MemoryOperand(1)); \
522 : __ movl(i.TempRegister(0), rax); \
523 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
524 : __ lock(); \
525 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
526 : __ j(not_equal, &binop); \
527 : } while (false)
528 :
529 : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
530 : do { \
531 : Label binop; \
532 : __ bind(&binop); \
533 : __ mov_inst(rax, i.MemoryOperand(1)); \
534 : __ movq(i.TempRegister(0), rax); \
535 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
536 : __ lock(); \
537 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
538 : __ j(not_equal, &binop); \
539 : } while (false)
540 :
541 : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
542 : do { \
543 : if (instr->InputAt(index)->IsSimd128Register()) { \
544 : __ opcode(dst_operand, i.InputSimd128Register(index)); \
545 : } else { \
546 : __ opcode(dst_operand, i.InputOperand(index)); \
547 : } \
548 : } while (false)
549 :
550 : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
551 : do { \
552 : if (instr->InputAt(index)->IsSimd128Register()) { \
553 : __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
554 : } else { \
555 : __ opcode(dst_operand, i.InputOperand(index), imm); \
556 : } \
557 : } while (false)
558 :
559 : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
560 : do { \
561 : XMMRegister dst = i.OutputSimd128Register(); \
562 : DCHECK_EQ(dst, i.InputSimd128Register(0)); \
563 : byte input_index = instr->InputCount() == 2 ? 1 : 0; \
564 : ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
565 : } while (false)
566 :
567 : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
568 : do { \
569 : CpuFeatureScope sse_scope(tasm(), SSELevel); \
570 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
571 : __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
572 : } while (false)
573 :
574 2521770 : void CodeGenerator::AssembleDeconstructFrame() {
575 2521770 : unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
576 2521808 : __ movq(rsp, rbp);
577 2522162 : __ popq(rbp);
578 2522079 : }
579 :
580 119712 : void CodeGenerator::AssemblePrepareTailCall() {
581 119712 : if (frame_access_state()->has_frame()) {
582 139312 : __ movq(rbp, MemOperand(rbp, 0));
583 : }
584 : frame_access_state()->SetFrameAccessToSP();
585 119712 : }
586 :
587 1120 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
588 : Register scratch1,
589 : Register scratch2,
590 : Register scratch3) {
591 : DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
592 1120 : Label done;
593 :
594 : // Check if current frame is an arguments adaptor frame.
595 2240 : __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
596 1120 : Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
597 1120 : __ j(not_equal, &done, Label::kNear);
598 :
599 : // Load arguments count from current arguments adaptor frame (note, it
600 : // does not include receiver).
601 1120 : Register caller_args_count_reg = scratch1;
602 2240 : __ SmiUntag(caller_args_count_reg,
603 1120 : Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
604 :
605 : ParameterCount callee_args_count(args_reg);
606 : __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
607 1120 : scratch3);
608 1120 : __ bind(&done);
609 1120 : }
610 :
611 : namespace {
612 :
613 270668 : void AdjustStackPointerForTailCall(Assembler* assembler,
614 : FrameAccessState* state,
615 : int new_slot_above_sp,
616 : bool allow_shrinkage = true) {
617 : int current_sp_offset = state->GetSPToFPSlotCount() +
618 270668 : StandardFrameConstants::kFixedSlotCountAboveFp;
619 270668 : int stack_slot_delta = new_slot_above_sp - current_sp_offset;
620 270668 : if (stack_slot_delta > 0) {
621 776 : assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
622 : state->IncreaseSPDelta(stack_slot_delta);
623 269892 : } else if (allow_shrinkage && stack_slot_delta < 0) {
624 69304 : assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
625 : state->IncreaseSPDelta(stack_slot_delta);
626 : }
627 270668 : }
628 :
629 2152 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
630 2152 : int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
631 2152 : assembler->movq(kScratchRegister, shuffle_mask);
632 2152 : assembler->Push(kScratchRegister);
633 2152 : shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
634 : assembler->movq(kScratchRegister, shuffle_mask);
635 2152 : assembler->Push(kScratchRegister);
636 2152 : }
637 :
638 : } // namespace
639 :
640 119724 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
641 : int first_unused_stack_slot) {
642 119724 : CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
643 : ZoneVector<MoveOperands*> pushes(zone());
644 119724 : GetPushCompatibleMoves(instr, flags, &pushes);
645 :
646 132652 : if (!pushes.empty() &&
647 25856 : (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
648 : first_unused_stack_slot)) {
649 : X64OperandConverter g(this, instr);
650 44148 : for (auto move : pushes) {
651 : LocationOperand destination_location(
652 : LocationOperand::cast(move->destination()));
653 31220 : InstructionOperand source(move->source());
654 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
655 31220 : destination_location.index());
656 31220 : if (source.IsStackSlot()) {
657 : LocationOperand source_location(LocationOperand::cast(source));
658 12940 : __ Push(g.SlotToOperand(source_location.index()));
659 18280 : } else if (source.IsRegister()) {
660 : LocationOperand source_location(LocationOperand::cast(source));
661 18280 : __ Push(source_location.GetRegister());
662 0 : } else if (source.IsImmediate()) {
663 0 : __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
664 : } else {
665 : // Pushes of non-scalar data types is not supported.
666 0 : UNIMPLEMENTED();
667 : }
668 : frame_access_state()->IncreaseSPDelta(1);
669 : move->Eliminate();
670 : }
671 : }
672 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
673 119724 : first_unused_stack_slot, false);
674 119724 : }
675 :
676 119724 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
677 : int first_unused_stack_slot) {
678 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
679 119724 : first_unused_stack_slot);
680 119724 : }
681 :
682 : // Check that {kJavaScriptCallCodeStartRegister} is correct.
683 114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
684 114 : __ ComputeCodeStartAddress(rbx);
685 114 : __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
686 114 : __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
687 114 : }
688 :
689 : // Check if the code object is marked for deoptimization. If it is, then it
690 : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
691 : // to:
692 : // 1. read from memory the word that contains that bit, which can be found in
693 : // the flags in the referenced {CodeDataContainer} object;
694 : // 2. test kMarkedForDeoptimizationBit in those flags; and
695 : // 3. if it is not zero then it jumps to the builtin.
696 463823 : void CodeGenerator::BailoutIfDeoptimized() {
697 : int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
698 927643 : __ LoadTaggedPointerField(rbx,
699 463829 : Operand(kJavaScriptCallCodeStartRegister, offset));
700 463830 : __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
701 : Immediate(1 << Code::kMarkedForDeoptimizationBit));
702 463821 : __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
703 463828 : RelocInfo::CODE_TARGET, not_zero);
704 463826 : }
705 :
706 0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
707 : // Set a mask which has all bits set in the normal case, but has all
708 : // bits cleared if we are speculatively executing the wrong PC.
709 0 : __ ComputeCodeStartAddress(rbx);
710 0 : __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
711 : __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
712 : __ movq(rbx, Immediate(-1));
713 0 : __ cmovq(equal, kSpeculationPoisonRegister, rbx);
714 0 : }
715 :
716 0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
717 0 : __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
718 : __ andq(kContextRegister, kSpeculationPoisonRegister);
719 : __ andq(rsp, kSpeculationPoisonRegister);
720 0 : }
721 :
722 : // Assembles an instruction after register allocation, producing machine code.
723 69637294 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
724 : Instruction* instr) {
725 : X64OperandConverter i(this, instr);
726 : InstructionCode opcode = instr->opcode();
727 69637294 : ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
728 69637294 : switch (arch_opcode) {
729 : case kArchCallCodeObject: {
730 5138147 : if (HasImmediateInput(instr, 0)) {
731 4763048 : Handle<Code> code = i.InputCode(0);
732 4763048 : __ Call(code, RelocInfo::CODE_TARGET);
733 : } else {
734 375114 : Register reg = i.InputRegister(0);
735 : DCHECK_IMPLIES(
736 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
737 : reg == kJavaScriptCallCodeStartRegister);
738 375114 : __ LoadCodeObjectEntry(reg, reg);
739 375115 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
740 0 : __ RetpolineCall(reg);
741 : } else {
742 375115 : __ call(reg);
743 : }
744 : }
745 5138171 : RecordCallPosition(instr);
746 : frame_access_state()->ClearSPDelta();
747 : break;
748 : }
749 : case kArchCallBuiltinPointer: {
750 : DCHECK(!HasImmediateInput(instr, 0));
751 3584 : Register builtin_pointer = i.InputRegister(0);
752 3584 : __ CallBuiltinPointer(builtin_pointer);
753 3584 : RecordCallPosition(instr);
754 : frame_access_state()->ClearSPDelta();
755 : break;
756 : }
757 : case kArchCallWasmFunction: {
758 969829 : if (HasImmediateInput(instr, 0)) {
759 92978 : Constant constant = i.ToConstant(instr->InputAt(0));
760 92984 : Address wasm_code = static_cast<Address>(constant.ToInt64());
761 92984 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
762 92989 : __ near_call(wasm_code, constant.rmode());
763 : } else {
764 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
765 0 : __ RetpolineCall(wasm_code, constant.rmode());
766 : } else {
767 0 : __ Call(wasm_code, constant.rmode());
768 : }
769 : }
770 : } else {
771 876851 : Register reg = i.InputRegister(0);
772 876851 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
773 0 : __ RetpolineCall(reg);
774 : } else {
775 876851 : __ call(reg);
776 : }
777 : }
778 969831 : RecordCallPosition(instr);
779 : frame_access_state()->ClearSPDelta();
780 : break;
781 : }
782 : case kArchTailCallCodeObjectFromJSFunction:
783 : case kArchTailCallCodeObject: {
784 36696 : if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
785 : AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
786 : i.TempRegister(0), i.TempRegister(1),
787 1120 : i.TempRegister(2));
788 : }
789 36696 : if (HasImmediateInput(instr, 0)) {
790 31072 : Handle<Code> code = i.InputCode(0);
791 31072 : __ Jump(code, RelocInfo::CODE_TARGET);
792 : } else {
793 5624 : Register reg = i.InputRegister(0);
794 : DCHECK_IMPLIES(
795 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
796 : reg == kJavaScriptCallCodeStartRegister);
797 5624 : __ LoadCodeObjectEntry(reg, reg);
798 5624 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
799 0 : __ RetpolineJump(reg);
800 : } else {
801 5624 : __ jmp(reg);
802 : }
803 : }
804 : unwinding_info_writer_.MarkBlockWillExit();
805 : frame_access_state()->ClearSPDelta();
806 36696 : frame_access_state()->SetFrameAccessToDefault();
807 36696 : break;
808 : }
809 : case kArchTailCallWasm: {
810 248 : if (HasImmediateInput(instr, 0)) {
811 144 : Constant constant = i.ToConstant(instr->InputAt(0));
812 144 : Address wasm_code = static_cast<Address>(constant.ToInt64());
813 144 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
814 144 : __ near_jmp(wasm_code, constant.rmode());
815 : } else {
816 : __ Move(kScratchRegister, wasm_code, constant.rmode());
817 0 : __ jmp(kScratchRegister);
818 : }
819 : } else {
820 104 : Register reg = i.InputRegister(0);
821 104 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
822 0 : __ RetpolineJump(reg);
823 : } else {
824 104 : __ jmp(reg);
825 : }
826 : }
827 : unwinding_info_writer_.MarkBlockWillExit();
828 : frame_access_state()->ClearSPDelta();
829 248 : frame_access_state()->SetFrameAccessToDefault();
830 248 : break;
831 : }
832 : case kArchTailCallAddress: {
833 82768 : CHECK(!HasImmediateInput(instr, 0));
834 82768 : Register reg = i.InputRegister(0);
835 : DCHECK_IMPLIES(
836 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
837 : reg == kJavaScriptCallCodeStartRegister);
838 82768 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
839 0 : __ RetpolineJump(reg);
840 : } else {
841 82768 : __ jmp(reg);
842 : }
843 : unwinding_info_writer_.MarkBlockWillExit();
844 : frame_access_state()->ClearSPDelta();
845 82768 : frame_access_state()->SetFrameAccessToDefault();
846 : break;
847 : }
848 : case kArchCallJSFunction: {
849 : Register func = i.InputRegister(0);
850 23828 : if (FLAG_debug_code) {
851 : // Check the function's context matches the context argument.
852 8 : __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
853 8 : __ Assert(equal, AbortReason::kWrongFunctionContext);
854 : }
855 : static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
856 : __ LoadTaggedPointerField(rcx,
857 23828 : FieldOperand(func, JSFunction::kCodeOffset));
858 23828 : __ CallCodeObject(rcx);
859 : frame_access_state()->ClearSPDelta();
860 23828 : RecordCallPosition(instr);
861 : break;
862 : }
863 : case kArchPrepareCallCFunction: {
864 : // Frame alignment requires using FP-relative frame addressing.
865 : frame_access_state()->SetFrameAccessToFP();
866 25620 : int const num_parameters = MiscField::decode(instr->opcode());
867 25620 : __ PrepareCallCFunction(num_parameters);
868 25620 : break;
869 : }
870 : case kArchSaveCallerRegisters: {
871 : fp_mode_ =
872 676 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
873 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
874 : // kReturnRegister0 should have been saved before entering the stub.
875 676 : int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
876 : DCHECK(IsAligned(bytes, kSystemPointerSize));
877 : DCHECK_EQ(0, frame_access_state()->sp_delta());
878 676 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
879 : DCHECK(!caller_registers_saved_);
880 676 : caller_registers_saved_ = true;
881 676 : break;
882 : }
883 : case kArchRestoreCallerRegisters: {
884 : DCHECK(fp_mode_ ==
885 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
886 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
887 : // Don't overwrite the returned value.
888 1352 : int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
889 676 : frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
890 : DCHECK_EQ(0, frame_access_state()->sp_delta());
891 : DCHECK(caller_registers_saved_);
892 676 : caller_registers_saved_ = false;
893 676 : break;
894 : }
895 : case kArchPrepareTailCall:
896 119712 : AssemblePrepareTailCall();
897 119712 : break;
898 : case kArchCallCFunction: {
899 : int const num_parameters = MiscField::decode(instr->opcode());
900 25620 : if (HasImmediateInput(instr, 0)) {
901 24480 : ExternalReference ref = i.InputExternalReference(0);
902 24480 : __ CallCFunction(ref, num_parameters);
903 : } else {
904 1140 : Register func = i.InputRegister(0);
905 1140 : __ CallCFunction(func, num_parameters);
906 : }
907 25620 : frame_access_state()->SetFrameAccessToDefault();
908 : // Ideally, we should decrement SP delta to match the change of stack
909 : // pointer in CallCFunction. However, for certain architectures (e.g.
910 : // ARM), there may be more strict alignment requirement, causing old SP
911 : // to be saved on the stack. In those cases, we can not calculate the SP
912 : // delta statically.
913 : frame_access_state()->ClearSPDelta();
914 25620 : if (caller_registers_saved_) {
915 : // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
916 : // Here, we assume the sequence to be:
917 : // kArchSaveCallerRegisters;
918 : // kArchCallCFunction;
919 : // kArchRestoreCallerRegisters;
920 : int bytes =
921 1352 : __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
922 676 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
923 : }
924 : // TODO(tebbi): Do we need an lfence here?
925 : break;
926 : }
927 : case kArchJmp:
928 5047095 : AssembleArchJump(i.InputRpo(0));
929 5047109 : break;
930 : case kArchBinarySearchSwitch:
931 33871 : AssembleArchBinarySearchSwitch(instr);
932 33872 : break;
933 : case kArchLookupSwitch:
934 0 : AssembleArchLookupSwitch(instr);
935 0 : break;
936 : case kArchTableSwitch:
937 307 : AssembleArchTableSwitch(instr);
938 307 : break;
939 : case kArchComment:
940 4 : __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
941 4 : break;
942 : case kArchDebugAbort:
943 : DCHECK(i.InputRegister(0) == rdx);
944 0 : if (!frame_access_state()->has_frame()) {
945 : // We don't actually want to generate a pile of code for this, so just
946 : // claim there is a stack frame, without generating one.
947 0 : FrameScope scope(tasm(), StackFrame::NONE);
948 0 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
949 0 : RelocInfo::CODE_TARGET);
950 : } else {
951 0 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
952 0 : RelocInfo::CODE_TARGET);
953 : }
954 0 : __ int3();
955 : unwinding_info_writer_.MarkBlockWillExit();
956 : break;
957 : case kArchDebugBreak:
958 266339 : __ int3();
959 266339 : break;
960 : case kArchThrowTerminator:
961 : unwinding_info_writer_.MarkBlockWillExit();
962 : break;
963 : case kArchNop:
964 : // don't emit code for nops.
965 : break;
966 : case kArchDeoptimize: {
967 : int deopt_state_id =
968 43684 : BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
969 : CodeGenResult result =
970 43684 : AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
971 43684 : if (result != kSuccess) return result;
972 : unwinding_info_writer_.MarkBlockWillExit();
973 : break;
974 : }
975 : case kArchRet:
976 2786995 : AssembleReturn(instr->InputAt(0));
977 2787109 : break;
978 : case kArchStackPointer:
979 0 : __ movq(i.OutputRegister(), rsp);
980 : break;
981 : case kArchFramePointer:
982 32660 : __ movq(i.OutputRegister(), rbp);
983 : break;
984 : case kArchParentFramePointer:
985 50404 : if (frame_access_state()->has_frame()) {
986 78132 : __ movq(i.OutputRegister(), Operand(rbp, 0));
987 : } else {
988 24360 : __ movq(i.OutputRegister(), rbp);
989 : }
990 : break;
991 : case kArchTruncateDoubleToI: {
992 : auto result = i.OutputRegister();
993 : auto input = i.InputDoubleRegister(0);
994 : auto ool = new (zone()) OutOfLineTruncateDoubleToI(
995 : this, result, input, DetermineStubCallMode(),
996 110861 : &unwinding_info_writer_);
997 : // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
998 : // use of Cvttsd2siq requires the movl below to avoid sign extension.
999 55425 : __ Cvttsd2siq(result, input);
1000 55432 : __ cmpq(result, Immediate(1));
1001 55433 : __ j(overflow, ool->entry());
1002 55434 : __ bind(ool->exit());
1003 : __ movl(result, result);
1004 : break;
1005 : }
1006 : case kArchStoreWithWriteBarrier: {
1007 : RecordWriteMode mode =
1008 : static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1009 : Register object = i.InputRegister(0);
1010 328902 : size_t index = 0;
1011 328902 : Operand operand = i.MemoryOperand(&index);
1012 328903 : Register value = i.InputRegister(index);
1013 : Register scratch0 = i.TempRegister(0);
1014 : Register scratch1 = i.TempRegister(1);
1015 : auto ool = new (zone())
1016 : OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1017 657805 : mode, DetermineStubCallMode());
1018 328902 : __ StoreTaggedField(operand, value);
1019 : __ CheckPageFlag(object, scratch0,
1020 : MemoryChunk::kPointersFromHereAreInterestingMask,
1021 328905 : not_zero, ool->entry());
1022 328904 : __ bind(ool->exit());
1023 : break;
1024 : }
1025 : case kArchWordPoisonOnSpeculation:
1026 : DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1027 0 : __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1028 : break;
1029 : case kLFence:
1030 0 : __ lfence();
1031 0 : break;
1032 : case kArchStackSlot: {
1033 : FrameOffset offset =
1034 366535 : frame_access_state()->GetFrameOffset(i.InputInt32(0));
1035 366536 : Register base = offset.from_stack_pointer() ? rsp : rbp;
1036 1099617 : __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1037 : break;
1038 : }
1039 : case kIeee754Float64Acos:
1040 116 : ASSEMBLE_IEEE754_UNOP(acos);
1041 116 : break;
1042 : case kIeee754Float64Acosh:
1043 116 : ASSEMBLE_IEEE754_UNOP(acosh);
1044 116 : break;
1045 : case kIeee754Float64Asin:
1046 116 : ASSEMBLE_IEEE754_UNOP(asin);
1047 116 : break;
1048 : case kIeee754Float64Asinh:
1049 116 : ASSEMBLE_IEEE754_UNOP(asinh);
1050 116 : break;
1051 : case kIeee754Float64Atan:
1052 133 : ASSEMBLE_IEEE754_UNOP(atan);
1053 133 : break;
1054 : case kIeee754Float64Atanh:
1055 116 : ASSEMBLE_IEEE754_UNOP(atanh);
1056 116 : break;
1057 : case kIeee754Float64Atan2:
1058 129 : ASSEMBLE_IEEE754_BINOP(atan2);
1059 129 : break;
1060 : case kIeee754Float64Cbrt:
1061 116 : ASSEMBLE_IEEE754_UNOP(cbrt);
1062 116 : break;
1063 : case kIeee754Float64Cos:
1064 271 : ASSEMBLE_IEEE754_UNOP(cos);
1065 271 : break;
1066 : case kIeee754Float64Cosh:
1067 123 : ASSEMBLE_IEEE754_UNOP(cosh);
1068 123 : break;
1069 : case kIeee754Float64Exp:
1070 148 : ASSEMBLE_IEEE754_UNOP(exp);
1071 148 : break;
1072 : case kIeee754Float64Expm1:
1073 123 : ASSEMBLE_IEEE754_UNOP(expm1);
1074 123 : break;
1075 : case kIeee754Float64Log:
1076 284 : ASSEMBLE_IEEE754_UNOP(log);
1077 284 : break;
1078 : case kIeee754Float64Log1p:
1079 116 : ASSEMBLE_IEEE754_UNOP(log1p);
1080 116 : break;
1081 : case kIeee754Float64Log2:
1082 116 : ASSEMBLE_IEEE754_UNOP(log2);
1083 116 : break;
1084 : case kIeee754Float64Log10:
1085 116 : ASSEMBLE_IEEE754_UNOP(log10);
1086 116 : break;
1087 : case kIeee754Float64Pow:
1088 336 : ASSEMBLE_IEEE754_BINOP(pow);
1089 336 : break;
1090 : case kIeee754Float64Sin:
1091 268 : ASSEMBLE_IEEE754_UNOP(sin);
1092 268 : break;
1093 : case kIeee754Float64Sinh:
1094 123 : ASSEMBLE_IEEE754_UNOP(sinh);
1095 123 : break;
1096 : case kIeee754Float64Tan:
1097 168 : ASSEMBLE_IEEE754_UNOP(tan);
1098 168 : break;
1099 : case kIeee754Float64Tanh:
1100 123 : ASSEMBLE_IEEE754_UNOP(tanh);
1101 123 : break;
1102 : case kX64Add32:
1103 984572 : ASSEMBLE_BINOP(addl);
1104 : break;
1105 : case kX64Add:
1106 126091 : ASSEMBLE_BINOP(addq);
1107 : break;
1108 : case kX64Sub32:
1109 213569 : ASSEMBLE_BINOP(subl);
1110 : break;
1111 : case kX64Sub:
1112 169118 : ASSEMBLE_BINOP(subq);
1113 : break;
1114 : case kX64And32:
1115 710762 : ASSEMBLE_BINOP(andl);
1116 : break;
1117 : case kX64And:
1118 1078401 : ASSEMBLE_BINOP(andq);
1119 : break;
1120 : case kX64Cmp8:
1121 35053 : ASSEMBLE_COMPARE(cmpb);
1122 : break;
1123 : case kX64Cmp16:
1124 1291848 : ASSEMBLE_COMPARE(cmpw);
1125 : break;
1126 : case kX64Cmp32:
1127 4672296 : ASSEMBLE_COMPARE(cmpl);
1128 : break;
1129 : case kX64Cmp:
1130 8158722 : ASSEMBLE_COMPARE(cmpq);
1131 : break;
1132 : case kX64Test8:
1133 317473 : ASSEMBLE_COMPARE(testb);
1134 : break;
1135 : case kX64Test16:
1136 98217 : ASSEMBLE_COMPARE(testw);
1137 : break;
1138 : case kX64Test32:
1139 470271 : ASSEMBLE_COMPARE(testl);
1140 : break;
1141 : case kX64Test:
1142 2643067 : ASSEMBLE_COMPARE(testq);
1143 : break;
1144 : case kX64Imul32:
1145 157294 : ASSEMBLE_MULT(imull);
1146 : break;
1147 : case kX64Imul:
1148 62807 : ASSEMBLE_MULT(imulq);
1149 : break;
1150 : case kX64ImulHigh32:
1151 4211 : if (instr->InputAt(1)->IsRegister()) {
1152 4211 : __ imull(i.InputRegister(1));
1153 : } else {
1154 0 : __ imull(i.InputOperand(1));
1155 : }
1156 : break;
1157 : case kX64UmulHigh32:
1158 1635 : if (instr->InputAt(1)->IsRegister()) {
1159 1635 : __ mull(i.InputRegister(1));
1160 : } else {
1161 0 : __ mull(i.InputOperand(1));
1162 : }
1163 : break;
1164 : case kX64Idiv32:
1165 31933 : __ cdq();
1166 : __ idivl(i.InputRegister(1));
1167 : break;
1168 : case kX64Idiv:
1169 2732 : __ cqo();
1170 : __ idivq(i.InputRegister(1));
1171 : break;
1172 : case kX64Udiv32:
1173 29050 : __ xorl(rdx, rdx);
1174 : __ divl(i.InputRegister(1));
1175 : break;
1176 : case kX64Udiv:
1177 1768 : __ xorq(rdx, rdx);
1178 : __ divq(i.InputRegister(1));
1179 : break;
1180 : case kX64Not:
1181 88 : ASSEMBLE_UNOP(notq);
1182 : break;
1183 : case kX64Not32:
1184 5566 : ASSEMBLE_UNOP(notl);
1185 : break;
1186 : case kX64Neg:
1187 16776 : ASSEMBLE_UNOP(negq);
1188 : break;
1189 : case kX64Neg32:
1190 13418 : ASSEMBLE_UNOP(negl);
1191 : break;
1192 : case kX64Or32:
1193 590899 : ASSEMBLE_BINOP(orl);
1194 : break;
1195 : case kX64Or:
1196 89332 : ASSEMBLE_BINOP(orq);
1197 : break;
1198 : case kX64Xor32:
1199 80302 : ASSEMBLE_BINOP(xorl);
1200 : break;
1201 : case kX64Xor:
1202 3272 : ASSEMBLE_BINOP(xorq);
1203 : break;
1204 : case kX64Shl32:
1205 118352 : ASSEMBLE_SHIFT(shll, 5);
1206 : break;
1207 : case kX64Shl:
1208 73686 : ASSEMBLE_SHIFT(shlq, 6);
1209 : break;
1210 : case kX64Shr32:
1211 394418 : ASSEMBLE_SHIFT(shrl, 5);
1212 : break;
1213 : case kX64Shr:
1214 196000 : ASSEMBLE_SHIFT(shrq, 6);
1215 : break;
1216 : case kX64Sar32:
1217 718490 : ASSEMBLE_SHIFT(sarl, 5);
1218 : break;
1219 : case kX64Sar:
1220 2573272 : ASSEMBLE_SHIFT(sarq, 6);
1221 : break;
1222 : case kX64Ror32:
1223 110630 : ASSEMBLE_SHIFT(rorl, 5);
1224 : break;
1225 : case kX64Ror:
1226 368 : ASSEMBLE_SHIFT(rorq, 6);
1227 : break;
1228 : case kX64Lzcnt:
1229 36 : if (instr->InputAt(0)->IsRegister()) {
1230 36 : __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1231 : } else {
1232 0 : __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1233 : }
1234 : break;
1235 : case kX64Lzcnt32:
1236 446 : if (instr->InputAt(0)->IsRegister()) {
1237 446 : __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1238 : } else {
1239 0 : __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1240 : }
1241 : break;
1242 : case kX64Tzcnt:
1243 44 : if (instr->InputAt(0)->IsRegister()) {
1244 44 : __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1245 : } else {
1246 0 : __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1247 : }
1248 : break;
1249 : case kX64Tzcnt32:
1250 332 : if (instr->InputAt(0)->IsRegister()) {
1251 332 : __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1252 : } else {
1253 0 : __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1254 : }
1255 : break;
1256 : case kX64Popcnt:
1257 44 : if (instr->InputAt(0)->IsRegister()) {
1258 44 : __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1259 : } else {
1260 0 : __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1261 : }
1262 : break;
1263 : case kX64Popcnt32:
1264 64 : if (instr->InputAt(0)->IsRegister()) {
1265 64 : __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1266 : } else {
1267 0 : __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1268 : }
1269 : break;
1270 : case kX64Bswap:
1271 12 : __ bswapq(i.OutputRegister());
1272 12 : break;
1273 : case kX64Bswap32:
1274 44 : __ bswapl(i.OutputRegister());
1275 44 : break;
1276 : case kSSEFloat32Cmp:
1277 0 : ASSEMBLE_SSE_BINOP(Ucomiss);
1278 : break;
1279 : case kSSEFloat32Add:
1280 0 : ASSEMBLE_SSE_BINOP(addss);
1281 : break;
1282 : case kSSEFloat32Sub:
1283 0 : ASSEMBLE_SSE_BINOP(subss);
1284 : break;
1285 : case kSSEFloat32Mul:
1286 0 : ASSEMBLE_SSE_BINOP(mulss);
1287 : break;
1288 : case kSSEFloat32Div:
1289 0 : ASSEMBLE_SSE_BINOP(divss);
1290 : // Don't delete this mov. It may improve performance on some CPUs,
1291 : // when there is a (v)mulss depending on the result.
1292 0 : __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1293 0 : break;
1294 : case kSSEFloat32Abs: {
1295 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1296 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1297 0 : __ psrlq(kScratchDoubleReg, 33);
1298 0 : __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1299 0 : break;
1300 : }
1301 : case kSSEFloat32Neg: {
1302 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1303 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1304 0 : __ psllq(kScratchDoubleReg, 31);
1305 0 : __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1306 0 : break;
1307 : }
1308 : case kSSEFloat32Sqrt:
1309 348 : ASSEMBLE_SSE_UNOP(sqrtss);
1310 : break;
1311 : case kSSEFloat32ToFloat64:
1312 40804 : ASSEMBLE_SSE_UNOP(Cvtss2sd);
1313 : break;
1314 : case kSSEFloat32Round: {
1315 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1316 : RoundingMode const mode =
1317 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1318 : __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1319 : break;
1320 : }
1321 : case kSSEFloat32ToInt32:
1322 348 : if (instr->InputAt(0)->IsFPRegister()) {
1323 348 : __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1324 : } else {
1325 0 : __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1326 : }
1327 : break;
1328 : case kSSEFloat32ToUint32: {
1329 56 : if (instr->InputAt(0)->IsFPRegister()) {
1330 56 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1331 : } else {
1332 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1333 : }
1334 : break;
1335 : }
1336 : case kSSEFloat64Cmp:
1337 1704 : ASSEMBLE_SSE_BINOP(Ucomisd);
1338 : break;
1339 : case kSSEFloat64Add:
1340 632 : ASSEMBLE_SSE_BINOP(addsd);
1341 : break;
1342 : case kSSEFloat64Sub:
1343 468 : ASSEMBLE_SSE_BINOP(subsd);
1344 : break;
1345 : case kSSEFloat64Mul:
1346 56 : ASSEMBLE_SSE_BINOP(mulsd);
1347 : break;
1348 : case kSSEFloat64Div:
1349 52 : ASSEMBLE_SSE_BINOP(divsd);
1350 : // Don't delete this mov. It may improve performance on some CPUs,
1351 : // when there is a (v)mulsd depending on the result.
1352 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1353 : break;
1354 : case kSSEFloat64Mod: {
1355 1646 : __ subq(rsp, Immediate(kDoubleSize));
1356 1646 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1357 1646 : kDoubleSize);
1358 : // Move values to st(0) and st(1).
1359 3292 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1360 1646 : __ fld_d(Operand(rsp, 0));
1361 3292 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1362 1646 : __ fld_d(Operand(rsp, 0));
1363 : // Loop while fprem isn't done.
1364 1646 : Label mod_loop;
1365 1646 : __ bind(&mod_loop);
1366 : // This instructions traps on all kinds inputs, but we are assuming the
1367 : // floating point control word is set to ignore them all.
1368 1646 : __ fprem();
1369 : // The following 2 instruction implicitly use rax.
1370 1646 : __ fnstsw_ax();
1371 1646 : if (CpuFeatures::IsSupported(SAHF)) {
1372 : CpuFeatureScope sahf_scope(tasm(), SAHF);
1373 1614 : __ sahf();
1374 : } else {
1375 : __ shrl(rax, Immediate(8));
1376 : __ andl(rax, Immediate(0xFF));
1377 32 : __ pushq(rax);
1378 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1379 32 : kSystemPointerSize);
1380 32 : __ popfq();
1381 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1382 32 : -kSystemPointerSize);
1383 : }
1384 1646 : __ j(parity_even, &mod_loop);
1385 : // Move output to stack and clean up.
1386 1646 : __ fstp(1);
1387 1646 : __ fstp_d(Operand(rsp, 0));
1388 3292 : __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1389 : __ addq(rsp, Immediate(kDoubleSize));
1390 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1391 1646 : -kDoubleSize);
1392 : break;
1393 : }
1394 : case kSSEFloat32Max: {
1395 66 : Label compare_swap, done_compare;
1396 66 : if (instr->InputAt(1)->IsFPRegister()) {
1397 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1398 : } else {
1399 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1400 : }
1401 : auto ool =
1402 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1403 66 : __ j(parity_even, ool->entry());
1404 66 : __ j(above, &done_compare, Label::kNear);
1405 66 : __ j(below, &compare_swap, Label::kNear);
1406 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1407 : __ testl(kScratchRegister, Immediate(1));
1408 66 : __ j(zero, &done_compare, Label::kNear);
1409 66 : __ bind(&compare_swap);
1410 66 : if (instr->InputAt(1)->IsFPRegister()) {
1411 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1412 : } else {
1413 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1414 : }
1415 66 : __ bind(&done_compare);
1416 66 : __ bind(ool->exit());
1417 : break;
1418 : }
1419 : case kSSEFloat32Min: {
1420 66 : Label compare_swap, done_compare;
1421 66 : if (instr->InputAt(1)->IsFPRegister()) {
1422 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1423 : } else {
1424 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1425 : }
1426 : auto ool =
1427 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1428 66 : __ j(parity_even, ool->entry());
1429 66 : __ j(below, &done_compare, Label::kNear);
1430 66 : __ j(above, &compare_swap, Label::kNear);
1431 66 : if (instr->InputAt(1)->IsFPRegister()) {
1432 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1433 : } else {
1434 0 : __ Movss(kScratchDoubleReg, i.InputOperand(1));
1435 : __ Movmskps(kScratchRegister, kScratchDoubleReg);
1436 : }
1437 : __ testl(kScratchRegister, Immediate(1));
1438 66 : __ j(zero, &done_compare, Label::kNear);
1439 66 : __ bind(&compare_swap);
1440 66 : if (instr->InputAt(1)->IsFPRegister()) {
1441 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1442 : } else {
1443 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1444 : }
1445 66 : __ bind(&done_compare);
1446 66 : __ bind(ool->exit());
1447 : break;
1448 : }
1449 : case kSSEFloat64Max: {
1450 252 : Label compare_swap, done_compare;
1451 252 : if (instr->InputAt(1)->IsFPRegister()) {
1452 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1453 : } else {
1454 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1455 : }
1456 : auto ool =
1457 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1458 252 : __ j(parity_even, ool->entry());
1459 252 : __ j(above, &done_compare, Label::kNear);
1460 252 : __ j(below, &compare_swap, Label::kNear);
1461 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1462 : __ testl(kScratchRegister, Immediate(1));
1463 252 : __ j(zero, &done_compare, Label::kNear);
1464 252 : __ bind(&compare_swap);
1465 252 : if (instr->InputAt(1)->IsFPRegister()) {
1466 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1467 : } else {
1468 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1469 : }
1470 252 : __ bind(&done_compare);
1471 252 : __ bind(ool->exit());
1472 : break;
1473 : }
1474 : case kSSEFloat64Min: {
1475 337 : Label compare_swap, done_compare;
1476 337 : if (instr->InputAt(1)->IsFPRegister()) {
1477 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1478 : } else {
1479 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1480 : }
1481 : auto ool =
1482 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1483 337 : __ j(parity_even, ool->entry());
1484 337 : __ j(below, &done_compare, Label::kNear);
1485 337 : __ j(above, &compare_swap, Label::kNear);
1486 337 : if (instr->InputAt(1)->IsFPRegister()) {
1487 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1488 : } else {
1489 0 : __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1490 : __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1491 : }
1492 : __ testl(kScratchRegister, Immediate(1));
1493 337 : __ j(zero, &done_compare, Label::kNear);
1494 337 : __ bind(&compare_swap);
1495 337 : if (instr->InputAt(1)->IsFPRegister()) {
1496 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1497 : } else {
1498 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1499 : }
1500 337 : __ bind(&done_compare);
1501 337 : __ bind(ool->exit());
1502 : break;
1503 : }
1504 : case kSSEFloat64Abs: {
1505 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1506 6 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1507 6 : __ psrlq(kScratchDoubleReg, 1);
1508 6 : __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1509 6 : break;
1510 : }
1511 : case kSSEFloat64Neg: {
1512 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1513 76 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1514 76 : __ psllq(kScratchDoubleReg, 63);
1515 76 : __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1516 76 : break;
1517 : }
1518 : case kSSEFloat64Sqrt:
1519 415 : ASSEMBLE_SSE_UNOP(Sqrtsd);
1520 : break;
1521 : case kSSEFloat64Round: {
1522 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1523 : RoundingMode const mode =
1524 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1525 : __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1526 : break;
1527 : }
1528 : case kSSEFloat64ToFloat32:
1529 35866 : ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1530 : break;
1531 : case kSSEFloat64ToInt32:
1532 128448 : if (instr->InputAt(0)->IsFPRegister()) {
1533 108118 : __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1534 : } else {
1535 40660 : __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1536 : }
1537 : break;
1538 : case kSSEFloat64ToUint32: {
1539 750 : if (instr->InputAt(0)->IsFPRegister()) {
1540 750 : __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1541 : } else {
1542 0 : __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1543 : }
1544 1500 : if (MiscField::decode(instr->opcode())) {
1545 690 : __ AssertZeroExtended(i.OutputRegister());
1546 : }
1547 : break;
1548 : }
1549 : case kSSEFloat32ToInt64:
1550 52 : if (instr->InputAt(0)->IsFPRegister()) {
1551 52 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1552 : } else {
1553 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1554 : }
1555 52 : if (instr->OutputCount() > 1) {
1556 48 : __ Set(i.OutputRegister(1), 1);
1557 48 : Label done;
1558 48 : Label fail;
1559 : __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1560 48 : if (instr->InputAt(0)->IsFPRegister()) {
1561 : __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1562 : } else {
1563 0 : __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1564 : }
1565 : // If the input is NaN, then the conversion fails.
1566 48 : __ j(parity_even, &fail);
1567 : // If the input is INT64_MIN, then the conversion succeeds.
1568 48 : __ j(equal, &done);
1569 : __ cmpq(i.OutputRegister(0), Immediate(1));
1570 : // If the conversion results in INT64_MIN, but the input was not
1571 : // INT64_MIN, then the conversion fails.
1572 48 : __ j(no_overflow, &done);
1573 48 : __ bind(&fail);
1574 48 : __ Set(i.OutputRegister(1), 0);
1575 48 : __ bind(&done);
1576 : }
1577 : break;
1578 : case kSSEFloat64ToInt64:
1579 895 : if (instr->InputAt(0)->IsFPRegister()) {
1580 894 : __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1581 : } else {
1582 2 : __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1583 : }
1584 897 : if (instr->OutputCount() > 1) {
1585 766 : __ Set(i.OutputRegister(1), 1);
1586 768 : Label done;
1587 768 : Label fail;
1588 : __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1589 767 : if (instr->InputAt(0)->IsFPRegister()) {
1590 : __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1591 : } else {
1592 0 : __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1593 : }
1594 : // If the input is NaN, then the conversion fails.
1595 768 : __ j(parity_even, &fail);
1596 : // If the input is INT64_MIN, then the conversion succeeds.
1597 769 : __ j(equal, &done);
1598 : __ cmpq(i.OutputRegister(0), Immediate(1));
1599 : // If the conversion results in INT64_MIN, but the input was not
1600 : // INT64_MIN, then the conversion fails.
1601 766 : __ j(no_overflow, &done);
1602 771 : __ bind(&fail);
1603 766 : __ Set(i.OutputRegister(1), 0);
1604 767 : __ bind(&done);
1605 : }
1606 : break;
1607 : case kSSEFloat32ToUint64: {
1608 52 : Label fail;
1609 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1610 52 : if (instr->InputAt(0)->IsFPRegister()) {
1611 52 : __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1612 : } else {
1613 0 : __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1614 : }
1615 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1616 52 : __ bind(&fail);
1617 : break;
1618 : }
1619 : case kSSEFloat64ToUint64: {
1620 2972 : Label fail;
1621 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1622 2972 : if (instr->InputAt(0)->IsFPRegister()) {
1623 2972 : __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1624 : } else {
1625 0 : __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1626 : }
1627 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1628 2972 : __ bind(&fail);
1629 : break;
1630 : }
1631 : case kSSEInt32ToFloat64:
1632 432758 : if (instr->InputAt(0)->IsRegister()) {
1633 375068 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1634 : } else {
1635 115374 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1636 : }
1637 : break;
1638 : case kSSEInt32ToFloat32:
1639 960 : if (instr->InputAt(0)->IsRegister()) {
1640 952 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1641 : } else {
1642 16 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1643 : }
1644 : break;
1645 : case kSSEInt64ToFloat32:
1646 31 : if (instr->InputAt(0)->IsRegister()) {
1647 31 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1648 : } else {
1649 0 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1650 : }
1651 : break;
1652 : case kSSEInt64ToFloat64:
1653 3256 : if (instr->InputAt(0)->IsRegister()) {
1654 1241 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1655 : } else {
1656 4030 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1657 : }
1658 : break;
1659 : case kSSEUint64ToFloat32:
1660 32 : if (instr->InputAt(0)->IsRegister()) {
1661 32 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1662 : } else {
1663 0 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1664 : }
1665 : break;
1666 : case kSSEUint64ToFloat64:
1667 3607 : if (instr->InputAt(0)->IsRegister()) {
1668 2375 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1669 : } else {
1670 2464 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1671 : }
1672 : break;
1673 : case kSSEUint32ToFloat64:
1674 11202 : if (instr->InputAt(0)->IsRegister()) {
1675 395 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1676 : } else {
1677 21614 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1678 : }
1679 : break;
1680 : case kSSEUint32ToFloat32:
1681 88 : if (instr->InputAt(0)->IsRegister()) {
1682 88 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1683 : } else {
1684 0 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1685 : }
1686 : break;
1687 : case kSSEFloat64ExtractLowWord32:
1688 116 : if (instr->InputAt(0)->IsFPStackSlot()) {
1689 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
1690 : } else {
1691 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1692 : }
1693 : break;
1694 : case kSSEFloat64ExtractHighWord32:
1695 96416 : if (instr->InputAt(0)->IsFPStackSlot()) {
1696 122818 : __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1697 : } else {
1698 35007 : __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1699 : }
1700 : break;
1701 : case kSSEFloat64InsertLowWord32:
1702 4 : if (instr->InputAt(1)->IsRegister()) {
1703 4 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1704 : } else {
1705 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1706 : }
1707 : break;
1708 : case kSSEFloat64InsertHighWord32:
1709 116 : if (instr->InputAt(1)->IsRegister()) {
1710 116 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1711 : } else {
1712 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1713 : }
1714 : break;
1715 : case kSSEFloat64LoadLowWord32:
1716 112 : if (instr->InputAt(0)->IsRegister()) {
1717 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1718 : } else {
1719 0 : __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1720 : }
1721 : break;
1722 : case kAVXFloat32Cmp: {
1723 : CpuFeatureScope avx_scope(tasm(), AVX);
1724 1481 : if (instr->InputAt(1)->IsFPRegister()) {
1725 1463 : __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1726 : } else {
1727 36 : __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1728 : }
1729 : break;
1730 : }
1731 : case kAVXFloat32Add:
1732 3586 : ASSEMBLE_AVX_BINOP(vaddss);
1733 : break;
1734 : case kAVXFloat32Sub:
1735 5130 : ASSEMBLE_AVX_BINOP(vsubss);
1736 : break;
1737 : case kAVXFloat32Mul:
1738 1742 : ASSEMBLE_AVX_BINOP(vmulss);
1739 : break;
1740 : case kAVXFloat32Div:
1741 704 : ASSEMBLE_AVX_BINOP(vdivss);
1742 : // Don't delete this mov. It may improve performance on some CPUs,
1743 : // when there is a (v)mulss depending on the result.
1744 : __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1745 : break;
1746 : case kAVXFloat64Cmp: {
1747 : CpuFeatureScope avx_scope(tasm(), AVX);
1748 254645 : if (instr->InputAt(1)->IsFPRegister()) {
1749 233953 : __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1750 : } else {
1751 41384 : __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1752 : }
1753 : break;
1754 : }
1755 : case kAVXFloat64Add:
1756 159360 : ASSEMBLE_AVX_BINOP(vaddsd);
1757 : break;
1758 : case kAVXFloat64Sub:
1759 31234 : ASSEMBLE_AVX_BINOP(vsubsd);
1760 : break;
1761 : case kAVXFloat64Mul:
1762 23700 : ASSEMBLE_AVX_BINOP(vmulsd);
1763 : break;
1764 : case kAVXFloat64Div:
1765 23878 : ASSEMBLE_AVX_BINOP(vdivsd);
1766 : // Don't delete this mov. It may improve performance on some CPUs,
1767 : // when there is a (v)mulsd depending on the result.
1768 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1769 : break;
1770 : case kAVXFloat32Abs: {
1771 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1772 : CpuFeatureScope avx_scope(tasm(), AVX);
1773 66 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1774 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1775 66 : if (instr->InputAt(0)->IsFPRegister()) {
1776 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1777 : i.InputDoubleRegister(0));
1778 : } else {
1779 0 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1780 : i.InputOperand(0));
1781 : }
1782 : break;
1783 : }
1784 : case kAVXFloat32Neg: {
1785 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1786 : CpuFeatureScope avx_scope(tasm(), AVX);
1787 168 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1788 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1789 166 : if (instr->InputAt(0)->IsFPRegister()) {
1790 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1791 : i.InputDoubleRegister(0));
1792 : } else {
1793 0 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1794 : i.InputOperand(0));
1795 : }
1796 : break;
1797 : }
1798 : case kAVXFloat64Abs: {
1799 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1800 : CpuFeatureScope avx_scope(tasm(), AVX);
1801 620 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1802 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1803 620 : if (instr->InputAt(0)->IsFPRegister()) {
1804 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1805 : i.InputDoubleRegister(0));
1806 : } else {
1807 0 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1808 : i.InputOperand(0));
1809 : }
1810 : break;
1811 : }
1812 : case kAVXFloat64Neg: {
1813 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1814 : CpuFeatureScope avx_scope(tasm(), AVX);
1815 10557 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1816 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1817 10557 : if (instr->InputAt(0)->IsFPRegister()) {
1818 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1819 : i.InputDoubleRegister(0));
1820 : } else {
1821 82 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1822 : i.InputOperand(0));
1823 : }
1824 : break;
1825 : }
1826 : case kSSEFloat64SilenceNaN:
1827 : __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1828 : __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1829 : break;
1830 : case kX64Movsxbl:
1831 41599 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1832 83809 : ASSEMBLE_MOVX(movsxbl);
1833 41599 : __ AssertZeroExtended(i.OutputRegister());
1834 41599 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1835 41599 : break;
1836 : case kX64Movzxbl:
1837 175453 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1838 352131 : ASSEMBLE_MOVX(movzxbl);
1839 175454 : __ AssertZeroExtended(i.OutputRegister());
1840 175454 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1841 175454 : break;
1842 : case kX64Movsxbq:
1843 14291 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1844 28586 : ASSEMBLE_MOVX(movsxbq);
1845 14291 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1846 14291 : break;
1847 : case kX64Movzxbq:
1848 14804 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1849 29608 : ASSEMBLE_MOVX(movzxbq);
1850 14804 : __ AssertZeroExtended(i.OutputRegister());
1851 14804 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1852 14804 : break;
1853 : case kX64Movb: {
1854 80019 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1855 80017 : size_t index = 0;
1856 80017 : Operand operand = i.MemoryOperand(&index);
1857 160034 : if (HasImmediateInput(instr, index)) {
1858 13332 : __ movb(operand, Immediate(i.InputInt8(index)));
1859 : } else {
1860 73351 : __ movb(operand, i.InputRegister(index));
1861 : }
1862 80018 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1863 : break;
1864 : }
1865 : case kX64Movsxwl:
1866 9864 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1867 20210 : ASSEMBLE_MOVX(movsxwl);
1868 9864 : __ AssertZeroExtended(i.OutputRegister());
1869 9864 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1870 9864 : break;
1871 : case kX64Movzxwl:
1872 153769 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1873 313872 : ASSEMBLE_MOVX(movzxwl);
1874 153769 : __ AssertZeroExtended(i.OutputRegister());
1875 153769 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1876 153769 : break;
1877 : case kX64Movsxwq:
1878 9643 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1879 19290 : ASSEMBLE_MOVX(movsxwq);
1880 : break;
1881 : case kX64Movzxwq:
1882 1236 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1883 2472 : ASSEMBLE_MOVX(movzxwq);
1884 1236 : __ AssertZeroExtended(i.OutputRegister());
1885 1236 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1886 1236 : break;
1887 : case kX64Movw: {
1888 15315 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1889 15315 : size_t index = 0;
1890 15315 : Operand operand = i.MemoryOperand(&index);
1891 30630 : if (HasImmediateInput(instr, index)) {
1892 1798 : __ movw(operand, Immediate(i.InputInt16(index)));
1893 : } else {
1894 14416 : __ movw(operand, i.InputRegister(index));
1895 : }
1896 15316 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1897 : break;
1898 : }
1899 : case kX64Movl:
1900 2640837 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1901 2640491 : if (instr->HasOutput()) {
1902 1320808 : if (instr->addressing_mode() == kMode_None) {
1903 606633 : if (instr->InputAt(0)->IsRegister()) {
1904 571356 : __ movl(i.OutputRegister(), i.InputRegister(0));
1905 : } else {
1906 70554 : __ movl(i.OutputRegister(), i.InputOperand(0));
1907 : }
1908 : } else {
1909 714178 : __ movl(i.OutputRegister(), i.MemoryOperand());
1910 : }
1911 1320920 : __ AssertZeroExtended(i.OutputRegister());
1912 : } else {
1913 1319683 : size_t index = 0;
1914 1319683 : Operand operand = i.MemoryOperand(&index);
1915 2639656 : if (HasImmediateInput(instr, index)) {
1916 364417 : __ movl(operand, i.InputImmediate(index));
1917 : } else {
1918 955411 : __ movl(operand, i.InputRegister(index));
1919 : }
1920 : }
1921 2640675 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1922 2640754 : break;
1923 : case kX64Movsxlq:
1924 419547 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1925 1248225 : ASSEMBLE_MOVX(movsxlq);
1926 419545 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1927 419550 : break;
1928 : case kX64MovqDecompressTaggedSigned: {
1929 87021 : CHECK(instr->HasOutput());
1930 87022 : __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1931 87023 : break;
1932 : }
1933 : case kX64MovqDecompressTaggedPointer: {
1934 1241066 : CHECK(instr->HasOutput());
1935 1241080 : __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1936 1241082 : break;
1937 : }
1938 : case kX64MovqDecompressAnyTagged: {
1939 2932646 : CHECK(instr->HasOutput());
1940 2932650 : __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1941 2932653 : break;
1942 : }
1943 : case kX64MovqCompressTagged: {
1944 2487159 : CHECK(!instr->HasOutput());
1945 2487159 : size_t index = 0;
1946 2487159 : Operand operand = i.MemoryOperand(&index);
1947 4974320 : if (HasImmediateInput(instr, index)) {
1948 197938 : __ StoreTaggedField(operand, i.InputImmediate(index));
1949 : } else {
1950 2289222 : __ StoreTaggedField(operand, i.InputRegister(index));
1951 : }
1952 : break;
1953 : }
1954 : case kX64DecompressSigned: {
1955 4 : CHECK(instr->HasOutput());
1956 4 : __ movsxlq(i.OutputRegister(), i.InputRegister(0));
1957 4 : break;
1958 : }
1959 : case kX64DecompressPointer: {
1960 4 : CHECK(instr->HasOutput());
1961 4 : __ movsxlq(i.OutputRegister(), i.InputRegister(0));
1962 : __ addq(i.OutputRegister(), kRootRegister);
1963 : break;
1964 : }
1965 : case kX64DecompressAny: {
1966 8 : CHECK(instr->HasOutput());
1967 8 : __ movsxlq(i.OutputRegister(), i.InputRegister(0));
1968 : // TODO(solanes): Do branchful compute?
1969 : // Branchlessly compute |masked_root|:
1970 : STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
1971 : Register masked_root = kScratchRegister;
1972 : __ movl(masked_root, i.OutputRegister());
1973 : __ andl(masked_root, Immediate(kSmiTagMask));
1974 : __ negq(masked_root);
1975 : __ andq(masked_root, kRootRegister);
1976 : // Now this add operation will either leave the value unchanged if it is a
1977 : // smi or add the isolate root if it is a heap object.
1978 : __ addq(i.OutputRegister(), masked_root);
1979 : break;
1980 : }
1981 : // TODO(solanes): Combine into one Compress? They seem to be identical.
1982 : // TODO(solanes): We might get away with doing a no-op in these three cases.
1983 : // The movl instruction is the conservative way for the moment.
1984 : case kX64CompressSigned: {
1985 4 : __ movl(i.OutputRegister(), i.InputRegister(0));
1986 : break;
1987 : }
1988 : case kX64CompressPointer: {
1989 4 : __ movl(i.OutputRegister(), i.InputRegister(0));
1990 : break;
1991 : }
1992 : case kX64CompressAny: {
1993 8 : __ movl(i.OutputRegister(), i.InputRegister(0));
1994 : break;
1995 : }
1996 : case kX64Movq:
1997 2101607 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1998 2101661 : if (instr->HasOutput()) {
1999 1591481 : __ movq(i.OutputRegister(), i.MemoryOperand());
2000 : } else {
2001 510340 : size_t index = 0;
2002 510340 : Operand operand = i.MemoryOperand(&index);
2003 1020704 : if (HasImmediateInput(instr, index)) {
2004 1304 : __ movq(operand, i.InputImmediate(index));
2005 : } else {
2006 509048 : __ movq(operand, i.InputRegister(index));
2007 : }
2008 : }
2009 2101966 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2010 2101859 : break;
2011 : case kX64Movss:
2012 28763 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2013 28763 : if (instr->HasOutput()) {
2014 15638 : __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
2015 : } else {
2016 13126 : size_t index = 0;
2017 13126 : Operand operand = i.MemoryOperand(&index);
2018 26252 : __ movss(operand, i.InputDoubleRegister(index));
2019 : }
2020 : break;
2021 : case kX64Movsd: {
2022 673675 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2023 673709 : if (instr->HasOutput()) {
2024 : const MemoryAccessMode access_mode =
2025 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2026 423762 : if (access_mode == kMemoryAccessPoisoned) {
2027 : // If we have to poison the loaded value, we load into a general
2028 : // purpose register first, mask it with the poison, and move the
2029 : // value from the general purpose register into the double register.
2030 0 : __ movq(kScratchRegister, i.MemoryOperand());
2031 : __ andq(kScratchRegister, kSpeculationPoisonRegister);
2032 : __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2033 : } else {
2034 : __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2035 : }
2036 : } else {
2037 249947 : size_t index = 0;
2038 249947 : Operand operand = i.MemoryOperand(&index);
2039 249945 : __ Movsd(operand, i.InputDoubleRegister(index));
2040 : }
2041 : break;
2042 : }
2043 : case kX64Movdqu: {
2044 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2045 9740 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2046 9740 : if (instr->HasOutput()) {
2047 5660 : __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2048 : } else {
2049 4080 : size_t index = 0;
2050 4080 : Operand operand = i.MemoryOperand(&index);
2051 8160 : __ movdqu(operand, i.InputSimd128Register(index));
2052 : }
2053 : break;
2054 : }
2055 : case kX64BitcastFI:
2056 554 : if (instr->InputAt(0)->IsFPStackSlot()) {
2057 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
2058 : } else {
2059 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2060 : }
2061 : break;
2062 : case kX64BitcastDL:
2063 531 : if (instr->InputAt(0)->IsFPStackSlot()) {
2064 0 : __ movq(i.OutputRegister(), i.InputOperand(0));
2065 : } else {
2066 : __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2067 : }
2068 : break;
2069 : case kX64BitcastIF:
2070 307 : if (instr->InputAt(0)->IsRegister()) {
2071 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2072 : } else {
2073 0 : __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2074 : }
2075 : break;
2076 : case kX64BitcastLD:
2077 153 : if (instr->InputAt(0)->IsRegister()) {
2078 : __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2079 : } else {
2080 0 : __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2081 : }
2082 : break;
2083 : case kX64Lea32: {
2084 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2085 : // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2086 : // and addressing mode just happens to work out. The "addl"/"subl" forms
2087 : // in these cases are faster based on measurements.
2088 414306 : if (i.InputRegister(0) == i.OutputRegister()) {
2089 204004 : if (mode == kMode_MRI) {
2090 : int32_t constant_summand = i.InputInt32(1);
2091 : DCHECK_NE(0, constant_summand);
2092 114167 : if (constant_summand > 0) {
2093 85828 : __ addl(i.OutputRegister(), Immediate(constant_summand));
2094 : } else {
2095 28339 : __ subl(i.OutputRegister(),
2096 : Immediate(base::NegateWithWraparound(constant_summand)));
2097 : }
2098 89826 : } else if (mode == kMode_MR1) {
2099 29588 : if (i.InputRegister(1) == i.OutputRegister()) {
2100 19819 : __ shll(i.OutputRegister(), Immediate(1));
2101 : } else {
2102 9769 : __ addl(i.OutputRegister(), i.InputRegister(1));
2103 : }
2104 60238 : } else if (mode == kMode_M2) {
2105 0 : __ shll(i.OutputRegister(), Immediate(1));
2106 60238 : } else if (mode == kMode_M4) {
2107 2912 : __ shll(i.OutputRegister(), Immediate(2));
2108 57326 : } else if (mode == kMode_M8) {
2109 95 : __ shll(i.OutputRegister(), Immediate(3));
2110 : } else {
2111 57232 : __ leal(i.OutputRegister(), i.MemoryOperand());
2112 : }
2113 261042 : } else if (mode == kMode_MR1 &&
2114 : i.InputRegister(1) == i.OutputRegister()) {
2115 25758 : __ addl(i.OutputRegister(), i.InputRegister(0));
2116 : } else {
2117 184544 : __ leal(i.OutputRegister(), i.MemoryOperand());
2118 : }
2119 414320 : __ AssertZeroExtended(i.OutputRegister());
2120 414310 : break;
2121 : }
2122 : case kX64Lea: {
2123 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2124 : // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2125 : // and addressing mode just happens to work out. The "addq"/"subq" forms
2126 : // in these cases are faster based on measurements.
2127 2410137 : if (i.InputRegister(0) == i.OutputRegister()) {
2128 631439 : if (mode == kMode_MRI) {
2129 : int32_t constant_summand = i.InputInt32(1);
2130 382125 : if (constant_summand > 0) {
2131 297853 : __ addq(i.OutputRegister(), Immediate(constant_summand));
2132 84272 : } else if (constant_summand < 0) {
2133 168492 : __ subq(i.OutputRegister(), Immediate(-constant_summand));
2134 : }
2135 249315 : } else if (mode == kMode_MR1) {
2136 193786 : if (i.InputRegister(1) == i.OutputRegister()) {
2137 145868 : __ shlq(i.OutputRegister(), Immediate(1));
2138 : } else {
2139 47918 : __ addq(i.OutputRegister(), i.InputRegister(1));
2140 : }
2141 55529 : } else if (mode == kMode_M2) {
2142 0 : __ shlq(i.OutputRegister(), Immediate(1));
2143 55529 : } else if (mode == kMode_M4) {
2144 10552 : __ shlq(i.OutputRegister(), Immediate(2));
2145 44977 : } else if (mode == kMode_M8) {
2146 2130 : __ shlq(i.OutputRegister(), Immediate(3));
2147 : } else {
2148 42847 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2149 : }
2150 2113842 : } else if (mode == kMode_MR1 &&
2151 : i.InputRegister(1) == i.OutputRegister()) {
2152 170359 : __ addq(i.OutputRegister(), i.InputRegister(0));
2153 : } else {
2154 1608342 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2155 : }
2156 : break;
2157 : }
2158 : case kX64Dec32:
2159 0 : __ decl(i.OutputRegister());
2160 : break;
2161 : case kX64Inc32:
2162 0 : __ incl(i.OutputRegister());
2163 : break;
2164 : case kX64Push:
2165 3680031 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2166 4 : size_t index = 0;
2167 4 : Operand operand = i.MemoryOperand(&index);
2168 4 : __ pushq(operand);
2169 : frame_access_state()->IncreaseSPDelta(1);
2170 4 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2171 4 : kSystemPointerSize);
2172 3680027 : } else if (HasImmediateInput(instr, 0)) {
2173 940911 : __ pushq(i.InputImmediate(0));
2174 : frame_access_state()->IncreaseSPDelta(1);
2175 940914 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2176 940914 : kSystemPointerSize);
2177 2739114 : } else if (instr->InputAt(0)->IsRegister()) {
2178 2011821 : __ pushq(i.InputRegister(0));
2179 : frame_access_state()->IncreaseSPDelta(1);
2180 2011828 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2181 2011828 : kSystemPointerSize);
2182 1446670 : } else if (instr->InputAt(0)->IsFloatRegister() ||
2183 : instr->InputAt(0)->IsDoubleRegister()) {
2184 : // TODO(titzer): use another machine instruction?
2185 14806 : __ subq(rsp, Immediate(kDoubleSize));
2186 : frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2187 14806 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2188 14806 : kDoubleSize);
2189 29612 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2190 712487 : } else if (instr->InputAt(0)->IsSimd128Register()) {
2191 : // TODO(titzer): use another machine instruction?
2192 120 : __ subq(rsp, Immediate(kSimd128Size));
2193 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2194 : kSystemPointerSize);
2195 120 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2196 120 : kSimd128Size);
2197 240 : __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2198 727064 : } else if (instr->InputAt(0)->IsStackSlot() ||
2199 723176 : instr->InputAt(0)->IsFloatStackSlot() ||
2200 : instr->InputAt(0)->IsDoubleStackSlot()) {
2201 712103 : __ pushq(i.InputOperand(0));
2202 : frame_access_state()->IncreaseSPDelta(1);
2203 712108 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2204 712108 : kSystemPointerSize);
2205 : } else {
2206 : DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2207 264 : __ Movups(kScratchDoubleReg, i.InputOperand(0));
2208 : // TODO(titzer): use another machine instruction?
2209 : __ subq(rsp, Immediate(kSimd128Size));
2210 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2211 : kSystemPointerSize);
2212 264 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2213 264 : kSimd128Size);
2214 528 : __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2215 : }
2216 : break;
2217 : case kX64Poke: {
2218 : int slot = MiscField::decode(instr->opcode());
2219 3392 : if (HasImmediateInput(instr, 0)) {
2220 2264 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2221 : } else {
2222 4520 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2223 : }
2224 : break;
2225 : }
2226 : case kX64Peek: {
2227 : int reverse_slot = i.InputInt32(0);
2228 : int offset =
2229 5072 : FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2230 5072 : if (instr->OutputAt(0)->IsFPRegister()) {
2231 : LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2232 2528 : if (op->representation() == MachineRepresentation::kFloat64) {
2233 2528 : __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2234 : } else {
2235 : DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2236 2528 : __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2237 : }
2238 : } else {
2239 7632 : __ movq(i.OutputRegister(), Operand(rbp, offset));
2240 : }
2241 : break;
2242 : }
2243 : // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2244 : case kX64F32x4Splat: {
2245 140 : XMMRegister dst = i.OutputSimd128Register();
2246 140 : if (instr->InputAt(0)->IsFPRegister()) {
2247 140 : __ movss(dst, i.InputDoubleRegister(0));
2248 : } else {
2249 0 : __ movss(dst, i.InputOperand(0));
2250 : }
2251 140 : __ shufps(dst, dst, 0x0);
2252 : break;
2253 : }
2254 : case kX64F32x4ExtractLane: {
2255 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2256 64 : __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2257 32 : __ movd(i.OutputDoubleRegister(), kScratchRegister);
2258 : break;
2259 : }
2260 : case kX64F32x4ReplaceLane: {
2261 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2262 : // The insertps instruction uses imm8[5:4] to indicate the lane
2263 : // that needs to be replaced.
2264 32 : byte select = i.InputInt8(1) << 4 & 0x30;
2265 32 : if (instr->InputAt(2)->IsFPRegister()) {
2266 64 : __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2267 32 : select);
2268 : } else {
2269 0 : __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2270 : }
2271 : break;
2272 : }
2273 : case kX64F32x4SConvertI32x4: {
2274 4 : __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2275 4 : break;
2276 : }
2277 : case kX64F32x4UConvertI32x4: {
2278 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2279 : DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2280 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2281 : XMMRegister dst = i.OutputSimd128Register();
2282 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2283 4 : __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2284 : __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2285 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2286 4 : __ psrld(dst, 1); // divide by 2 to get in unsigned range
2287 4 : __ cvtdq2ps(dst, dst); // convert hi exactly
2288 4 : __ addps(dst, dst); // double hi, exactly
2289 4 : __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2290 : break;
2291 : }
2292 : case kX64F32x4Abs: {
2293 : XMMRegister dst = i.OutputSimd128Register();
2294 : XMMRegister src = i.InputSimd128Register(0);
2295 4 : if (dst == src) {
2296 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2297 4 : __ psrld(kScratchDoubleReg, 1);
2298 4 : __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2299 : } else {
2300 0 : __ pcmpeqd(dst, dst);
2301 0 : __ psrld(dst, 1);
2302 0 : __ andps(dst, i.InputSimd128Register(0));
2303 : }
2304 : break;
2305 : }
2306 : case kX64F32x4Neg: {
2307 : XMMRegister dst = i.OutputSimd128Register();
2308 : XMMRegister src = i.InputSimd128Register(0);
2309 4 : if (dst == src) {
2310 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2311 4 : __ pslld(kScratchDoubleReg, 31);
2312 4 : __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2313 : } else {
2314 0 : __ pcmpeqd(dst, dst);
2315 0 : __ pslld(dst, 31);
2316 0 : __ xorps(dst, i.InputSimd128Register(0));
2317 : }
2318 : break;
2319 : }
2320 : case kX64F32x4RecipApprox: {
2321 4 : __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2322 4 : break;
2323 : }
2324 : case kX64F32x4RecipSqrtApprox: {
2325 4 : __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2326 4 : break;
2327 : }
2328 : case kX64F32x4Add: {
2329 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2330 12 : __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2331 12 : break;
2332 : }
2333 : case kX64F32x4AddHoriz: {
2334 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2335 : CpuFeatureScope sse_scope(tasm(), SSE3);
2336 4 : __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2337 : break;
2338 : }
2339 : case kX64F32x4Sub: {
2340 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2341 4 : __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2342 4 : break;
2343 : }
2344 : case kX64F32x4Mul: {
2345 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2346 4 : __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2347 4 : break;
2348 : }
2349 : case kX64F32x4Min: {
2350 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2351 : // minps doesn't propagate NaN lanes in the first source. Compare this
2352 : // with itself to generate 1's in those lanes (quiet NaNs) and or them
2353 : // with the result of minps to simulate NaN propagation.
2354 4 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
2355 4 : __ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
2356 4 : __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2357 4 : __ orps(i.OutputSimd128Register(), kScratchDoubleReg);
2358 4 : break;
2359 : }
2360 : case kX64F32x4Max: {
2361 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2362 : // maxps doesn't propagate NaN lanes in the first source. Compare this
2363 : // with itself to generate 1's in those lanes (quiet NaNs) and or them
2364 : // with the result of maxps to simulate NaN propagation.
2365 4 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
2366 4 : __ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
2367 4 : __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2368 4 : __ orps(i.OutputSimd128Register(), kScratchDoubleReg);
2369 4 : break;
2370 : }
2371 : case kX64F32x4Eq: {
2372 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2373 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2374 4 : break;
2375 : }
2376 : case kX64F32x4Ne: {
2377 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2378 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2379 4 : break;
2380 : }
2381 : case kX64F32x4Lt: {
2382 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2383 8 : __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2384 : break;
2385 : }
2386 : case kX64F32x4Le: {
2387 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2388 8 : __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2389 : break;
2390 : }
2391 : case kX64I32x4Splat: {
2392 1072 : XMMRegister dst = i.OutputSimd128Register();
2393 1072 : if (instr->InputAt(0)->IsRegister()) {
2394 1072 : __ movd(dst, i.InputRegister(0));
2395 : } else {
2396 0 : __ movd(dst, i.InputOperand(0));
2397 : }
2398 1072 : __ pshufd(dst, dst, 0x0);
2399 : break;
2400 : }
2401 : case kX64I32x4ExtractLane: {
2402 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2403 3768 : __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2404 : break;
2405 : }
2406 : case kX64I32x4ReplaceLane: {
2407 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2408 1784 : if (instr->InputAt(2)->IsRegister()) {
2409 344 : __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2410 344 : i.InputInt8(1));
2411 : } else {
2412 2880 : __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2413 : }
2414 : break;
2415 : }
2416 : case kX64I32x4SConvertF32x4: {
2417 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2418 : XMMRegister dst = i.OutputSimd128Register();
2419 : // NAN->0
2420 4 : __ movaps(kScratchDoubleReg, dst);
2421 : __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2422 : __ pand(dst, kScratchDoubleReg);
2423 : // Set top bit if >= 0 (but not -0.0!)
2424 : __ pxor(kScratchDoubleReg, dst);
2425 : // Convert
2426 4 : __ cvttps2dq(dst, dst);
2427 : // Set top bit if >=0 is now < 0
2428 : __ pand(kScratchDoubleReg, dst);
2429 4 : __ psrad(kScratchDoubleReg, 31);
2430 : // Set positive overflow lanes to 0x7FFFFFFF
2431 : __ pxor(dst, kScratchDoubleReg);
2432 : break;
2433 : }
2434 : case kX64I32x4SConvertI16x8Low: {
2435 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2436 4 : __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2437 : break;
2438 : }
2439 : case kX64I32x4SConvertI16x8High: {
2440 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2441 : XMMRegister dst = i.OutputSimd128Register();
2442 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2443 : __ pmovsxwd(dst, dst);
2444 : break;
2445 : }
2446 : case kX64I32x4Neg: {
2447 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2448 : XMMRegister dst = i.OutputSimd128Register();
2449 : XMMRegister src = i.InputSimd128Register(0);
2450 4 : if (dst == src) {
2451 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2452 : __ psignd(dst, kScratchDoubleReg);
2453 : } else {
2454 0 : __ pxor(dst, dst);
2455 : __ psubd(dst, src);
2456 : }
2457 : break;
2458 : }
2459 : case kX64I32x4Shl: {
2460 248 : __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2461 124 : break;
2462 : }
2463 : case kX64I32x4ShrS: {
2464 248 : __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2465 124 : break;
2466 : }
2467 : case kX64I32x4Add: {
2468 12 : __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2469 : break;
2470 : }
2471 : case kX64I32x4AddHoriz: {
2472 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2473 4 : __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2474 : break;
2475 : }
2476 : case kX64I32x4Sub: {
2477 4 : __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2478 : break;
2479 : }
2480 : case kX64I32x4Mul: {
2481 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2482 4 : __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2483 : break;
2484 : }
2485 : case kX64I32x4MinS: {
2486 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2487 4 : __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2488 : break;
2489 : }
2490 : case kX64I32x4MaxS: {
2491 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2492 4 : __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2493 : break;
2494 : }
2495 : case kX64I32x4Eq: {
2496 12 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2497 : break;
2498 : }
2499 : case kX64I32x4Ne: {
2500 16 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2501 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2502 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2503 : break;
2504 : }
2505 : case kX64I32x4GtS: {
2506 8 : __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2507 : break;
2508 : }
2509 : case kX64I32x4GeS: {
2510 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2511 : XMMRegister dst = i.OutputSimd128Register();
2512 : XMMRegister src = i.InputSimd128Register(1);
2513 8 : __ pminsd(dst, src);
2514 : __ pcmpeqd(dst, src);
2515 : break;
2516 : }
2517 : case kX64I32x4UConvertF32x4: {
2518 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2519 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2520 : XMMRegister dst = i.OutputSimd128Register();
2521 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2522 : // NAN->0, negative->0
2523 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2524 4 : __ maxps(dst, kScratchDoubleReg);
2525 : // scratch: float representation of max_signed
2526 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2527 4 : __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2528 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2529 : // tmp: convert (src-max_signed).
2530 : // Positive overflow lanes -> 0x7FFFFFFF
2531 : // Negative lanes -> 0
2532 4 : __ movaps(tmp, dst);
2533 4 : __ subps(tmp, kScratchDoubleReg);
2534 : __ cmpleps(kScratchDoubleReg, tmp);
2535 4 : __ cvttps2dq(tmp, tmp);
2536 : __ pxor(tmp, kScratchDoubleReg);
2537 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2538 : __ pmaxsd(tmp, kScratchDoubleReg);
2539 : // convert. Overflow lanes above max_signed will be 0x80000000
2540 4 : __ cvttps2dq(dst, dst);
2541 : // Add (src-max_signed) for overflow lanes.
2542 : __ paddd(dst, tmp);
2543 : break;
2544 : }
2545 : case kX64I32x4UConvertI16x8Low: {
2546 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2547 4 : __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2548 : break;
2549 : }
2550 : case kX64I32x4UConvertI16x8High: {
2551 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2552 : XMMRegister dst = i.OutputSimd128Register();
2553 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2554 : __ pmovzxwd(dst, dst);
2555 : break;
2556 : }
2557 : case kX64I32x4ShrU: {
2558 248 : __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2559 124 : break;
2560 : }
2561 : case kX64I32x4MinU: {
2562 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2563 4 : __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2564 : break;
2565 : }
2566 : case kX64I32x4MaxU: {
2567 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2568 4 : __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2569 : break;
2570 : }
2571 : case kX64I32x4GtU: {
2572 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2573 : XMMRegister dst = i.OutputSimd128Register();
2574 : XMMRegister src = i.InputSimd128Register(1);
2575 8 : __ pmaxud(dst, src);
2576 : __ pcmpeqd(dst, src);
2577 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2578 : __ pxor(dst, kScratchDoubleReg);
2579 : break;
2580 : }
2581 : case kX64I32x4GeU: {
2582 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2583 : XMMRegister dst = i.OutputSimd128Register();
2584 : XMMRegister src = i.InputSimd128Register(1);
2585 8 : __ pminud(dst, src);
2586 : __ pcmpeqd(dst, src);
2587 : break;
2588 : }
2589 : case kX64S128Zero: {
2590 16 : XMMRegister dst = i.OutputSimd128Register();
2591 16 : __ xorps(dst, dst);
2592 : break;
2593 : }
2594 : case kX64I16x8Splat: {
2595 404 : XMMRegister dst = i.OutputSimd128Register();
2596 404 : if (instr->InputAt(0)->IsRegister()) {
2597 404 : __ movd(dst, i.InputRegister(0));
2598 : } else {
2599 0 : __ movd(dst, i.InputOperand(0));
2600 : }
2601 404 : __ pshuflw(dst, dst, 0x0);
2602 404 : __ pshufd(dst, dst, 0x0);
2603 : break;
2604 : }
2605 : case kX64I16x8ExtractLane: {
2606 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2607 32 : Register dst = i.OutputRegister();
2608 64 : __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2609 32 : __ movsxwl(dst, dst);
2610 : break;
2611 : }
2612 : case kX64I16x8ReplaceLane: {
2613 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2614 52 : if (instr->InputAt(2)->IsRegister()) {
2615 104 : __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2616 52 : i.InputInt8(1));
2617 : } else {
2618 0 : __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2619 : }
2620 : break;
2621 : }
2622 : case kX64I16x8SConvertI8x16Low: {
2623 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2624 4 : __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2625 : break;
2626 : }
2627 : case kX64I16x8SConvertI8x16High: {
2628 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2629 : XMMRegister dst = i.OutputSimd128Register();
2630 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2631 : __ pmovsxbw(dst, dst);
2632 : break;
2633 : }
2634 : case kX64I16x8Neg: {
2635 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2636 : XMMRegister dst = i.OutputSimd128Register();
2637 : XMMRegister src = i.InputSimd128Register(0);
2638 4 : if (dst == src) {
2639 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2640 : __ psignw(dst, kScratchDoubleReg);
2641 : } else {
2642 0 : __ pxor(dst, dst);
2643 : __ psubw(dst, src);
2644 : }
2645 : break;
2646 : }
2647 : case kX64I16x8Shl: {
2648 120 : __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2649 60 : break;
2650 : }
2651 : case kX64I16x8ShrS: {
2652 120 : __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2653 60 : break;
2654 : }
2655 : case kX64I16x8SConvertI32x4: {
2656 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2657 4 : __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2658 : break;
2659 : }
2660 : case kX64I16x8Add: {
2661 4 : __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2662 : break;
2663 : }
2664 : case kX64I16x8AddSaturateS: {
2665 4 : __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2666 : break;
2667 : }
2668 : case kX64I16x8AddHoriz: {
2669 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2670 4 : __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2671 : break;
2672 : }
2673 : case kX64I16x8Sub: {
2674 4 : __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2675 : break;
2676 : }
2677 : case kX64I16x8SubSaturateS: {
2678 4 : __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2679 : break;
2680 : }
2681 : case kX64I16x8Mul: {
2682 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2683 4 : __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2684 : break;
2685 : }
2686 : case kX64I16x8MinS: {
2687 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2688 4 : __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2689 : break;
2690 : }
2691 : case kX64I16x8MaxS: {
2692 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2693 4 : __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2694 : break;
2695 : }
2696 : case kX64I16x8Eq: {
2697 12 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2698 : break;
2699 : }
2700 : case kX64I16x8Ne: {
2701 16 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2702 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2703 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2704 : break;
2705 : }
2706 : case kX64I16x8GtS: {
2707 8 : __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2708 : break;
2709 : }
2710 : case kX64I16x8GeS: {
2711 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2712 : XMMRegister dst = i.OutputSimd128Register();
2713 : XMMRegister src = i.InputSimd128Register(1);
2714 8 : __ pminsw(dst, src);
2715 : __ pcmpeqw(dst, src);
2716 : break;
2717 : }
2718 : case kX64I16x8UConvertI8x16Low: {
2719 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2720 4 : __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2721 : break;
2722 : }
2723 : case kX64I16x8UConvertI8x16High: {
2724 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2725 : XMMRegister dst = i.OutputSimd128Register();
2726 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2727 : __ pmovzxbw(dst, dst);
2728 : break;
2729 : }
2730 : case kX64I16x8ShrU: {
2731 120 : __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2732 60 : break;
2733 : }
2734 : case kX64I16x8UConvertI32x4: {
2735 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2736 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2737 : XMMRegister dst = i.OutputSimd128Register();
2738 : // Change negative lanes to 0x7FFFFFFF
2739 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2740 4 : __ psrld(kScratchDoubleReg, 1);
2741 : __ pminud(dst, kScratchDoubleReg);
2742 : __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2743 : __ packusdw(dst, kScratchDoubleReg);
2744 : break;
2745 : }
2746 : case kX64I16x8AddSaturateU: {
2747 4 : __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2748 : break;
2749 : }
2750 : case kX64I16x8SubSaturateU: {
2751 4 : __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2752 : break;
2753 : }
2754 : case kX64I16x8MinU: {
2755 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756 4 : __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2757 : break;
2758 : }
2759 : case kX64I16x8MaxU: {
2760 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2761 4 : __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2762 : break;
2763 : }
2764 : case kX64I16x8GtU: {
2765 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2766 : XMMRegister dst = i.OutputSimd128Register();
2767 : XMMRegister src = i.InputSimd128Register(1);
2768 8 : __ pmaxuw(dst, src);
2769 : __ pcmpeqw(dst, src);
2770 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2771 : __ pxor(dst, kScratchDoubleReg);
2772 : break;
2773 : }
2774 : case kX64I16x8GeU: {
2775 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2776 : XMMRegister dst = i.OutputSimd128Register();
2777 : XMMRegister src = i.InputSimd128Register(1);
2778 16 : __ pminuw(dst, src);
2779 : __ pcmpeqw(dst, src);
2780 : break;
2781 : }
2782 : case kX64I8x16Splat: {
2783 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2784 : XMMRegister dst = i.OutputSimd128Register();
2785 304 : if (instr->InputAt(0)->IsRegister()) {
2786 304 : __ movd(dst, i.InputRegister(0));
2787 : } else {
2788 0 : __ movd(dst, i.InputOperand(0));
2789 : }
2790 304 : __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2791 : __ pshufb(dst, kScratchDoubleReg);
2792 : break;
2793 : }
2794 : case kX64I8x16ExtractLane: {
2795 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2796 32 : Register dst = i.OutputRegister();
2797 64 : __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2798 32 : __ movsxbl(dst, dst);
2799 : break;
2800 : }
2801 : case kX64I8x16ReplaceLane: {
2802 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2803 84 : if (instr->InputAt(2)->IsRegister()) {
2804 168 : __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2805 84 : i.InputInt8(1));
2806 : } else {
2807 0 : __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2808 : }
2809 : break;
2810 : }
2811 : case kX64I8x16SConvertI16x8: {
2812 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2813 4 : __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2814 : break;
2815 : }
2816 : case kX64I8x16Neg: {
2817 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2818 : XMMRegister dst = i.OutputSimd128Register();
2819 : XMMRegister src = i.InputSimd128Register(0);
2820 4 : if (dst == src) {
2821 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2822 : __ psignb(dst, kScratchDoubleReg);
2823 : } else {
2824 0 : __ pxor(dst, dst);
2825 : __ psubb(dst, src);
2826 : }
2827 : break;
2828 : }
2829 : case kX64I8x16Shl: {
2830 : XMMRegister dst = i.OutputSimd128Register();
2831 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2832 28 : int8_t shift = i.InputInt8(1) & 0x7;
2833 28 : if (shift < 4) {
2834 : // For small shifts, doubling is faster.
2835 60 : for (int i = 0; i < shift; ++i) {
2836 24 : __ paddb(dst, dst);
2837 : }
2838 : } else {
2839 : // Mask off the unwanted bits before word-shifting.
2840 16 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2841 16 : __ psrlw(kScratchDoubleReg, 8 + shift);
2842 : __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2843 : __ pand(dst, kScratchDoubleReg);
2844 16 : __ psllw(dst, shift);
2845 : }
2846 : break;
2847 : }
2848 : case kX64I8x16ShrS: {
2849 : XMMRegister dst = i.OutputSimd128Register();
2850 : XMMRegister src = i.InputSimd128Register(0);
2851 28 : int8_t shift = i.InputInt8(1) & 0x7;
2852 : // Unpack the bytes into words, do arithmetic shifts, and repack.
2853 28 : __ punpckhbw(kScratchDoubleReg, src);
2854 : __ punpcklbw(dst, src);
2855 28 : __ psraw(kScratchDoubleReg, 8 + shift);
2856 28 : __ psraw(dst, 8 + shift);
2857 : __ packsswb(dst, kScratchDoubleReg);
2858 : break;
2859 : }
2860 : case kX64I8x16Add: {
2861 4 : __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2862 : break;
2863 : }
2864 : case kX64I8x16AddSaturateS: {
2865 4 : __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2866 : break;
2867 : }
2868 : case kX64I8x16Sub: {
2869 4 : __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2870 : break;
2871 : }
2872 : case kX64I8x16SubSaturateS: {
2873 4 : __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2874 : break;
2875 : }
2876 : case kX64I8x16Mul: {
2877 : XMMRegister dst = i.OutputSimd128Register();
2878 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2879 : XMMRegister right = i.InputSimd128Register(1);
2880 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2881 : // I16x8 view of I8x16
2882 : // left = AAaa AAaa ... AAaa AAaa
2883 : // right= BBbb BBbb ... BBbb BBbb
2884 : // t = 00AA 00AA ... 00AA 00AA
2885 : // s = 00BB 00BB ... 00BB 00BB
2886 4 : __ movaps(tmp, dst);
2887 4 : __ movaps(kScratchDoubleReg, right);
2888 4 : __ psrlw(tmp, 8);
2889 4 : __ psrlw(kScratchDoubleReg, 8);
2890 : // dst = left * 256
2891 4 : __ psllw(dst, 8);
2892 : // t = I16x8Mul(t, s)
2893 : // => __PP __PP ... __PP __PP
2894 : __ pmullw(tmp, kScratchDoubleReg);
2895 : // dst = I16x8Mul(left * 256, right)
2896 : // => pp__ pp__ ... pp__ pp__
2897 : __ pmullw(dst, right);
2898 : // t = I16x8Shl(t, 8)
2899 : // => PP00 PP00 ... PP00 PP00
2900 4 : __ psllw(tmp, 8);
2901 : // dst = I16x8Shr(dst, 8)
2902 : // => 00pp 00pp ... 00pp 00pp
2903 4 : __ psrlw(dst, 8);
2904 : // dst = I16x8Or(dst, t)
2905 : // => PPpp PPpp ... PPpp PPpp
2906 : __ por(dst, tmp);
2907 : break;
2908 : }
2909 : case kX64I8x16MinS: {
2910 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2911 4 : __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2912 : break;
2913 : }
2914 : case kX64I8x16MaxS: {
2915 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2916 4 : __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2917 : break;
2918 : }
2919 : case kX64I8x16Eq: {
2920 12 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2921 : break;
2922 : }
2923 : case kX64I8x16Ne: {
2924 16 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2925 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2926 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2927 : break;
2928 : }
2929 : case kX64I8x16GtS: {
2930 8 : __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2931 : break;
2932 : }
2933 : case kX64I8x16GeS: {
2934 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2935 : XMMRegister dst = i.OutputSimd128Register();
2936 : XMMRegister src = i.InputSimd128Register(1);
2937 8 : __ pminsb(dst, src);
2938 : __ pcmpeqb(dst, src);
2939 : break;
2940 : }
2941 : case kX64I8x16UConvertI16x8: {
2942 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2943 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2944 : XMMRegister dst = i.OutputSimd128Register();
2945 : // Change negative lanes to 0x7FFF
2946 4 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2947 4 : __ psrlw(kScratchDoubleReg, 1);
2948 : __ pminuw(dst, kScratchDoubleReg);
2949 : __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2950 : __ packuswb(dst, kScratchDoubleReg);
2951 : break;
2952 : }
2953 : case kX64I8x16ShrU: {
2954 : XMMRegister dst = i.OutputSimd128Register();
2955 : XMMRegister src = i.InputSimd128Register(0);
2956 28 : int8_t shift = i.InputInt8(1) & 0x7;
2957 : // Unpack the bytes into words, do logical shifts, and repack.
2958 28 : __ punpckhbw(kScratchDoubleReg, src);
2959 : __ punpcklbw(dst, src);
2960 28 : __ psrlw(kScratchDoubleReg, 8 + shift);
2961 28 : __ psrlw(dst, 8 + shift);
2962 : __ packuswb(dst, kScratchDoubleReg);
2963 : break;
2964 : }
2965 : case kX64I8x16AddSaturateU: {
2966 4 : __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2967 : break;
2968 : }
2969 : case kX64I8x16SubSaturateU: {
2970 4 : __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2971 : break;
2972 : }
2973 : case kX64I8x16MinU: {
2974 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2975 4 : __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2976 : break;
2977 : }
2978 : case kX64I8x16MaxU: {
2979 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2980 4 : __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2981 : break;
2982 : }
2983 : case kX64I8x16GtU: {
2984 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2985 : XMMRegister dst = i.OutputSimd128Register();
2986 : XMMRegister src = i.InputSimd128Register(1);
2987 8 : __ pmaxub(dst, src);
2988 : __ pcmpeqb(dst, src);
2989 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2990 : __ pxor(dst, kScratchDoubleReg);
2991 : break;
2992 : }
2993 : case kX64I8x16GeU: {
2994 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2995 : XMMRegister dst = i.OutputSimd128Register();
2996 : XMMRegister src = i.InputSimd128Register(1);
2997 0 : __ pminub(dst, src);
2998 : __ pcmpeqb(dst, src);
2999 : break;
3000 : }
3001 : case kX64S128And: {
3002 4 : __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3003 : break;
3004 : }
3005 : case kX64S128Or: {
3006 4 : __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3007 : break;
3008 : }
3009 : case kX64S128Xor: {
3010 4 : __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3011 : break;
3012 : }
3013 : case kX64S128Not: {
3014 : XMMRegister dst = i.OutputSimd128Register();
3015 : XMMRegister src = i.InputSimd128Register(0);
3016 4 : if (dst == src) {
3017 4 : __ movaps(kScratchDoubleReg, dst);
3018 : __ pcmpeqd(dst, dst);
3019 : __ pxor(dst, kScratchDoubleReg);
3020 : } else {
3021 0 : __ pcmpeqd(dst, dst);
3022 : __ pxor(dst, src);
3023 : }
3024 :
3025 : break;
3026 : }
3027 : case kX64S128Select: {
3028 : // Mask used here is stored in dst.
3029 28 : XMMRegister dst = i.OutputSimd128Register();
3030 28 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3031 28 : __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3032 28 : __ andps(dst, kScratchDoubleReg);
3033 28 : __ xorps(dst, i.InputSimd128Register(2));
3034 : break;
3035 : }
3036 : case kX64S8x16Shuffle: {
3037 : XMMRegister dst = i.OutputSimd128Register();
3038 : Register tmp = i.TempRegister(0);
3039 : // Prepare 16 byte aligned buffer for shuffle control mask
3040 1396 : __ movq(tmp, rsp);
3041 : __ andq(rsp, Immediate(-16));
3042 1396 : if (instr->InputCount() == 5) { // only one input operand
3043 640 : uint32_t mask[4] = {};
3044 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3045 5760 : for (int j = 4; j > 0; j--) {
3046 5120 : mask[j - 1] = i.InputUint32(j);
3047 : }
3048 :
3049 640 : SetupShuffleMaskOnStack(tasm(), mask);
3050 1280 : __ pshufb(dst, Operand(rsp, 0));
3051 : } else { // two input operands
3052 : DCHECK_EQ(6, instr->InputCount());
3053 1512 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
3054 756 : uint32_t mask[4] = {};
3055 6804 : for (int j = 5; j > 1; j--) {
3056 3024 : uint32_t lanes = i.InputUint32(j);
3057 27216 : for (int k = 0; k < 32; k += 8) {
3058 12096 : uint8_t lane = lanes >> k;
3059 12096 : mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3060 : }
3061 : }
3062 756 : SetupShuffleMaskOnStack(tasm(), mask);
3063 1512 : __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
3064 756 : uint32_t mask1[4] = {};
3065 756 : if (instr->InputAt(1)->IsSimd128Register()) {
3066 : XMMRegister src1 = i.InputSimd128Register(1);
3067 756 : if (src1 != dst) __ movups(dst, src1);
3068 : } else {
3069 0 : __ movups(dst, i.InputOperand(1));
3070 : }
3071 6804 : for (int j = 5; j > 1; j--) {
3072 3024 : uint32_t lanes = i.InputUint32(j);
3073 27216 : for (int k = 0; k < 32; k += 8) {
3074 12096 : uint8_t lane = lanes >> k;
3075 12096 : mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3076 : }
3077 : }
3078 756 : SetupShuffleMaskOnStack(tasm(), mask1);
3079 1512 : __ pshufb(dst, Operand(rsp, 0));
3080 : __ por(dst, kScratchDoubleReg);
3081 : }
3082 : __ movq(rsp, tmp);
3083 : break;
3084 : }
3085 : case kX64S32x4Swizzle: {
3086 : DCHECK_EQ(2, instr->InputCount());
3087 924 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3088 : i.InputInt8(1));
3089 : break;
3090 : }
3091 : case kX64S32x4Shuffle: {
3092 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3093 : DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3094 : int8_t shuffle = i.InputInt8(2);
3095 : DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3096 920 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3097 920 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3098 920 : __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3099 : break;
3100 : }
3101 : case kX64S16x8Blend: {
3102 112 : ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3103 56 : break;
3104 : }
3105 : case kX64S16x8HalfShuffle1: {
3106 352 : XMMRegister dst = i.OutputSimd128Register();
3107 1056 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3108 352 : __ pshufhw(dst, dst, i.InputInt8(2));
3109 : break;
3110 : }
3111 : case kX64S16x8HalfShuffle2: {
3112 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3113 176 : XMMRegister dst = i.OutputSimd128Register();
3114 528 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3115 176 : __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3116 528 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3117 176 : __ pshufhw(dst, dst, i.InputInt8(3));
3118 176 : __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3119 : break;
3120 : }
3121 : case kX64S8x16Alignr: {
3122 480 : ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3123 240 : break;
3124 : }
3125 : case kX64S16x8Dup: {
3126 164 : XMMRegister dst = i.OutputSimd128Register();
3127 164 : int8_t lane = i.InputInt8(1) & 0x7;
3128 164 : int8_t lane4 = lane & 0x3;
3129 164 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3130 164 : if (lane < 4) {
3131 328 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3132 164 : __ pshufd(dst, dst, 0);
3133 : } else {
3134 0 : ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3135 0 : __ pshufd(dst, dst, 0xaa);
3136 : }
3137 : break;
3138 : }
3139 : case kX64S8x16Dup: {
3140 : XMMRegister dst = i.OutputSimd128Register();
3141 224 : int8_t lane = i.InputInt8(1) & 0xf;
3142 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3143 224 : if (lane < 8) {
3144 224 : __ punpcklbw(dst, dst);
3145 : } else {
3146 0 : __ punpckhbw(dst, dst);
3147 : }
3148 224 : lane &= 0x7;
3149 224 : int8_t lane4 = lane & 0x3;
3150 224 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3151 224 : if (lane < 4) {
3152 208 : __ pshuflw(dst, dst, half_dup);
3153 208 : __ pshufd(dst, dst, 0);
3154 : } else {
3155 16 : __ pshufhw(dst, dst, half_dup);
3156 16 : __ pshufd(dst, dst, 0xaa);
3157 : }
3158 : break;
3159 : }
3160 : case kX64S64x2UnpackHigh:
3161 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3162 : break;
3163 : case kX64S32x4UnpackHigh:
3164 300 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3165 : break;
3166 : case kX64S16x8UnpackHigh:
3167 348 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3168 : break;
3169 : case kX64S8x16UnpackHigh:
3170 264 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3171 : break;
3172 : case kX64S64x2UnpackLow:
3173 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3174 : break;
3175 : case kX64S32x4UnpackLow:
3176 312 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3177 : break;
3178 : case kX64S16x8UnpackLow:
3179 192 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3180 : break;
3181 : case kX64S8x16UnpackLow:
3182 348 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3183 : break;
3184 : case kX64S16x8UnzipHigh: {
3185 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3186 : XMMRegister dst = i.OutputSimd128Register();
3187 : XMMRegister src2 = dst;
3188 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3189 64 : if (instr->InputCount() == 2) {
3190 112 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3191 56 : __ psrld(kScratchDoubleReg, 16);
3192 : src2 = kScratchDoubleReg;
3193 : }
3194 64 : __ psrld(dst, 16);
3195 : __ packusdw(dst, src2);
3196 : break;
3197 : }
3198 : case kX64S16x8UnzipLow: {
3199 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3200 : XMMRegister dst = i.OutputSimd128Register();
3201 : XMMRegister src2 = dst;
3202 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3203 72 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3204 72 : if (instr->InputCount() == 2) {
3205 128 : ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3206 : src2 = kScratchDoubleReg;
3207 : }
3208 72 : __ pblendw(dst, kScratchDoubleReg, 0xaa);
3209 : __ packusdw(dst, src2);
3210 : break;
3211 : }
3212 : case kX64S8x16UnzipHigh: {
3213 : XMMRegister dst = i.OutputSimd128Register();
3214 : XMMRegister src2 = dst;
3215 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3216 84 : if (instr->InputCount() == 2) {
3217 136 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3218 68 : __ psrlw(kScratchDoubleReg, 8);
3219 : src2 = kScratchDoubleReg;
3220 : }
3221 84 : __ psrlw(dst, 8);
3222 : __ packuswb(dst, src2);
3223 : break;
3224 : }
3225 : case kX64S8x16UnzipLow: {
3226 : XMMRegister dst = i.OutputSimd128Register();
3227 : XMMRegister src2 = dst;
3228 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3229 128 : if (instr->InputCount() == 2) {
3230 224 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3231 112 : __ psllw(kScratchDoubleReg, 8);
3232 112 : __ psrlw(kScratchDoubleReg, 8);
3233 : src2 = kScratchDoubleReg;
3234 : }
3235 128 : __ psllw(dst, 8);
3236 128 : __ psrlw(dst, 8);
3237 : __ packuswb(dst, src2);
3238 : break;
3239 : }
3240 : case kX64S8x16TransposeLow: {
3241 : XMMRegister dst = i.OutputSimd128Register();
3242 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3243 96 : __ psllw(dst, 8);
3244 96 : if (instr->InputCount() == 1) {
3245 8 : __ movups(kScratchDoubleReg, dst);
3246 : } else {
3247 : DCHECK_EQ(2, instr->InputCount());
3248 176 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3249 88 : __ psllw(kScratchDoubleReg, 8);
3250 : }
3251 96 : __ psrlw(dst, 8);
3252 : __ por(dst, kScratchDoubleReg);
3253 : break;
3254 : }
3255 : case kX64S8x16TransposeHigh: {
3256 : XMMRegister dst = i.OutputSimd128Register();
3257 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3258 124 : __ psrlw(dst, 8);
3259 124 : if (instr->InputCount() == 1) {
3260 24 : __ movups(kScratchDoubleReg, dst);
3261 : } else {
3262 : DCHECK_EQ(2, instr->InputCount());
3263 200 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3264 100 : __ psrlw(kScratchDoubleReg, 8);
3265 : }
3266 124 : __ psllw(kScratchDoubleReg, 8);
3267 : __ por(dst, kScratchDoubleReg);
3268 : break;
3269 : }
3270 : case kX64S8x8Reverse:
3271 : case kX64S8x4Reverse:
3272 : case kX64S8x2Reverse: {
3273 : DCHECK_EQ(1, instr->InputCount());
3274 : XMMRegister dst = i.OutputSimd128Register();
3275 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3276 304 : if (arch_opcode != kX64S8x2Reverse) {
3277 : // First shuffle words into position.
3278 204 : int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3279 204 : __ pshuflw(dst, dst, shuffle_mask);
3280 204 : __ pshufhw(dst, dst, shuffle_mask);
3281 : }
3282 304 : __ movaps(kScratchDoubleReg, dst);
3283 304 : __ psrlw(kScratchDoubleReg, 8);
3284 304 : __ psllw(dst, 8);
3285 : __ por(dst, kScratchDoubleReg);
3286 : break;
3287 : }
3288 : case kX64S1x4AnyTrue:
3289 : case kX64S1x8AnyTrue:
3290 : case kX64S1x16AnyTrue: {
3291 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3292 : Register dst = i.OutputRegister();
3293 : XMMRegister src = i.InputSimd128Register(0);
3294 : Register tmp = i.TempRegister(0);
3295 60 : __ xorq(tmp, tmp);
3296 : __ movq(dst, Immediate(1));
3297 : __ ptest(src, src);
3298 60 : __ cmovq(zero, dst, tmp);
3299 : break;
3300 : }
3301 : case kX64S1x4AllTrue:
3302 : case kX64S1x8AllTrue:
3303 : case kX64S1x16AllTrue: {
3304 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3305 : Register dst = i.OutputRegister();
3306 : XMMRegister src = i.InputSimd128Register(0);
3307 : Register tmp = i.TempRegister(0);
3308 60 : __ movq(tmp, Immediate(1));
3309 : __ xorq(dst, dst);
3310 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3311 : __ pxor(kScratchDoubleReg, src);
3312 : __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3313 60 : __ cmovq(zero, dst, tmp);
3314 : break;
3315 : }
3316 : case kX64StackCheck:
3317 561937 : __ CompareRoot(rsp, RootIndex::kStackLimit);
3318 561950 : break;
3319 : case kWord32AtomicExchangeInt8: {
3320 1027 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3321 1026 : __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3322 1028 : break;
3323 : }
3324 : case kWord32AtomicExchangeUint8: {
3325 904 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3326 : __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3327 : break;
3328 : }
3329 : case kWord32AtomicExchangeInt16: {
3330 602 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3331 602 : __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3332 602 : break;
3333 : }
3334 : case kWord32AtomicExchangeUint16: {
3335 858 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3336 : __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3337 : break;
3338 : }
3339 : case kWord32AtomicExchangeWord32: {
3340 1284 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3341 : break;
3342 : }
3343 : case kWord32AtomicCompareExchangeInt8: {
3344 112 : __ lock();
3345 112 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3346 112 : __ movsxbl(rax, rax);
3347 112 : break;
3348 : }
3349 : case kWord32AtomicCompareExchangeUint8: {
3350 129 : __ lock();
3351 129 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3352 : __ movzxbl(rax, rax);
3353 : break;
3354 : }
3355 : case kWord32AtomicCompareExchangeInt16: {
3356 112 : __ lock();
3357 112 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3358 112 : __ movsxwl(rax, rax);
3359 112 : break;
3360 : }
3361 : case kWord32AtomicCompareExchangeUint16: {
3362 129 : __ lock();
3363 129 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3364 : __ movzxwl(rax, rax);
3365 : break;
3366 : }
3367 : case kWord32AtomicCompareExchangeWord32: {
3368 257 : __ lock();
3369 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3370 : break;
3371 : }
3372 : #define ATOMIC_BINOP_CASE(op, inst) \
3373 : case kWord32Atomic##op##Int8: \
3374 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3375 : __ movsxbl(rax, rax); \
3376 : break; \
3377 : case kWord32Atomic##op##Uint8: \
3378 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3379 : __ movzxbl(rax, rax); \
3380 : break; \
3381 : case kWord32Atomic##op##Int16: \
3382 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3383 : __ movsxwl(rax, rax); \
3384 : break; \
3385 : case kWord32Atomic##op##Uint16: \
3386 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3387 : __ movzxwl(rax, rax); \
3388 : break; \
3389 : case kWord32Atomic##op##Word32: \
3390 : ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3391 : break;
3392 8996 : ATOMIC_BINOP_CASE(Add, addl)
3393 6158 : ATOMIC_BINOP_CASE(Sub, subl)
3394 8531 : ATOMIC_BINOP_CASE(And, andl)
3395 7754 : ATOMIC_BINOP_CASE(Or, orl)
3396 7886 : ATOMIC_BINOP_CASE(Xor, xorl)
3397 : #undef ATOMIC_BINOP_CASE
3398 : case kX64Word64AtomicExchangeUint8: {
3399 1942 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3400 : __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3401 : break;
3402 : }
3403 : case kX64Word64AtomicExchangeUint16: {
3404 1352 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3405 : __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3406 : break;
3407 : }
3408 : case kX64Word64AtomicExchangeUint32: {
3409 742 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3410 : break;
3411 : }
3412 : case kX64Word64AtomicExchangeUint64: {
3413 864 : __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3414 : break;
3415 : }
3416 : case kX64Word64AtomicCompareExchangeUint8: {
3417 17 : __ lock();
3418 17 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3419 : __ movzxbq(rax, rax);
3420 : break;
3421 : }
3422 : case kX64Word64AtomicCompareExchangeUint16: {
3423 25 : __ lock();
3424 25 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3425 : __ movzxwq(rax, rax);
3426 : break;
3427 : }
3428 : case kX64Word64AtomicCompareExchangeUint32: {
3429 25 : __ lock();
3430 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3431 : break;
3432 : }
3433 : case kX64Word64AtomicCompareExchangeUint64: {
3434 265 : __ lock();
3435 : __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3436 : break;
3437 : }
3438 : #define ATOMIC64_BINOP_CASE(op, inst) \
3439 : case kX64Word64Atomic##op##Uint8: \
3440 : ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3441 : __ movzxbq(rax, rax); \
3442 : break; \
3443 : case kX64Word64Atomic##op##Uint16: \
3444 : ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3445 : __ movzxwq(rax, rax); \
3446 : break; \
3447 : case kX64Word64Atomic##op##Uint32: \
3448 : ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3449 : break; \
3450 : case kX64Word64Atomic##op##Uint64: \
3451 : ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3452 : break;
3453 6634 : ATOMIC64_BINOP_CASE(Add, addq)
3454 9340 : ATOMIC64_BINOP_CASE(Sub, subq)
3455 7667 : ATOMIC64_BINOP_CASE(And, andq)
3456 8644 : ATOMIC64_BINOP_CASE(Or, orq)
3457 8512 : ATOMIC64_BINOP_CASE(Xor, xorq)
3458 : #undef ATOMIC64_BINOP_CASE
3459 : case kWord32AtomicLoadInt8:
3460 : case kWord32AtomicLoadUint8:
3461 : case kWord32AtomicLoadInt16:
3462 : case kWord32AtomicLoadUint16:
3463 : case kWord32AtomicLoadWord32:
3464 : case kWord32AtomicStoreWord8:
3465 : case kWord32AtomicStoreWord16:
3466 : case kWord32AtomicStoreWord32:
3467 : case kX64Word64AtomicLoadUint8:
3468 : case kX64Word64AtomicLoadUint16:
3469 : case kX64Word64AtomicLoadUint32:
3470 : case kX64Word64AtomicLoadUint64:
3471 : case kX64Word64AtomicStoreWord8:
3472 : case kX64Word64AtomicStoreWord16:
3473 : case kX64Word64AtomicStoreWord32:
3474 : case kX64Word64AtomicStoreWord64:
3475 0 : UNREACHABLE(); // Won't be generated by instruction selector.
3476 : break;
3477 : }
3478 : return kSuccess;
3479 : } // NOLadability/fn_size)
3480 :
3481 : #undef ASSEMBLE_UNOP
3482 : #undef ASSEMBLE_BINOP
3483 : #undef ASSEMBLE_COMPARE
3484 : #undef ASSEMBLE_MULT
3485 : #undef ASSEMBLE_SHIFT
3486 : #undef ASSEMBLE_MOVX
3487 : #undef ASSEMBLE_SSE_BINOP
3488 : #undef ASSEMBLE_SSE_UNOP
3489 : #undef ASSEMBLE_AVX_BINOP
3490 : #undef ASSEMBLE_IEEE754_BINOP
3491 : #undef ASSEMBLE_IEEE754_UNOP
3492 : #undef ASSEMBLE_ATOMIC_BINOP
3493 : #undef ASSEMBLE_ATOMIC64_BINOP
3494 : #undef ASSEMBLE_SIMD_INSTR
3495 : #undef ASSEMBLE_SIMD_IMM_INSTR
3496 : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3497 : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
3498 :
3499 : namespace {
3500 :
3501 6149065 : Condition FlagsConditionToCondition(FlagsCondition condition) {
3502 6149065 : switch (condition) {
3503 : case kUnorderedEqual:
3504 : case kEqual:
3505 : return equal;
3506 : case kUnorderedNotEqual:
3507 : case kNotEqual:
3508 1356209 : return not_equal;
3509 : case kSignedLessThan:
3510 168581 : return less;
3511 : case kSignedGreaterThanOrEqual:
3512 62092 : return greater_equal;
3513 : case kSignedLessThanOrEqual:
3514 67002 : return less_equal;
3515 : case kSignedGreaterThan:
3516 72868 : return greater;
3517 : case kUnsignedLessThan:
3518 164989 : return below;
3519 : case kUnsignedGreaterThanOrEqual:
3520 274889 : return above_equal;
3521 : case kUnsignedLessThanOrEqual:
3522 945152 : return below_equal;
3523 : case kUnsignedGreaterThan:
3524 157297 : return above;
3525 : case kOverflow:
3526 339794 : return overflow;
3527 : case kNotOverflow:
3528 7 : return no_overflow;
3529 : default:
3530 : break;
3531 : }
3532 0 : UNREACHABLE();
3533 : }
3534 :
3535 : } // namespace
3536 :
3537 : // Assembles branches after this instruction.
3538 5406918 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3539 : Label::Distance flabel_distance =
3540 5406918 : branch->fallthru ? Label::kNear : Label::kFar;
3541 5406918 : Label* tlabel = branch->true_label;
3542 5406918 : Label* flabel = branch->false_label;
3543 5406918 : if (branch->condition == kUnorderedEqual) {
3544 38092 : __ j(parity_even, flabel, flabel_distance);
3545 5368826 : } else if (branch->condition == kUnorderedNotEqual) {
3546 118197 : __ j(parity_even, tlabel);
3547 : }
3548 5406917 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3549 :
3550 5406938 : if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3551 5406938 : }
3552 :
3553 0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3554 : Instruction* instr) {
3555 : // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3556 0 : if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3557 : return;
3558 : }
3559 :
3560 : condition = NegateFlagsCondition(condition);
3561 0 : __ movl(kScratchRegister, Immediate(0));
3562 0 : __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3563 0 : kScratchRegister);
3564 : }
3565 :
3566 332001 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3567 : BranchInfo* branch) {
3568 : Label::Distance flabel_distance =
3569 332001 : branch->fallthru ? Label::kNear : Label::kFar;
3570 332001 : Label* tlabel = branch->true_label;
3571 332001 : Label* flabel = branch->false_label;
3572 332001 : Label nodeopt;
3573 332001 : if (branch->condition == kUnorderedEqual) {
3574 0 : __ j(parity_even, flabel, flabel_distance);
3575 332001 : } else if (branch->condition == kUnorderedNotEqual) {
3576 3897 : __ j(parity_even, tlabel);
3577 : }
3578 332001 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3579 :
3580 332005 : if (FLAG_deopt_every_n_times > 0) {
3581 : ExternalReference counter =
3582 288 : ExternalReference::stress_deopt_count(isolate());
3583 :
3584 288 : __ pushfq();
3585 288 : __ pushq(rax);
3586 288 : __ load_rax(counter);
3587 : __ decl(rax);
3588 288 : __ j(not_zero, &nodeopt);
3589 :
3590 576 : __ Set(rax, FLAG_deopt_every_n_times);
3591 288 : __ store_rax(counter);
3592 288 : __ popq(rax);
3593 288 : __ popfq();
3594 288 : __ jmp(tlabel);
3595 :
3596 288 : __ bind(&nodeopt);
3597 288 : __ store_rax(counter);
3598 288 : __ popq(rax);
3599 288 : __ popfq();
3600 : }
3601 :
3602 332005 : if (!branch->fallthru) {
3603 0 : __ jmp(flabel, flabel_distance);
3604 : }
3605 332005 : }
3606 :
3607 5121997 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
3608 8350975 : if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3609 5122010 : }
3610 :
3611 34500 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
3612 : FlagsCondition condition) {
3613 : auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3614 : Label* tlabel = ool->entry();
3615 34447 : Label end;
3616 34447 : if (condition == kUnorderedEqual) {
3617 0 : __ j(parity_even, &end);
3618 34447 : } else if (condition == kUnorderedNotEqual) {
3619 304 : __ j(parity_even, tlabel);
3620 : }
3621 34447 : __ j(FlagsConditionToCondition(condition), tlabel);
3622 34472 : __ bind(&end);
3623 34481 : }
3624 :
3625 : // Assembles boolean materializations after this instruction.
3626 375714 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3627 : FlagsCondition condition) {
3628 : X64OperandConverter i(this, instr);
3629 375714 : Label done;
3630 :
3631 : // Materialize a full 64-bit 1 or 0 value. The result register is always the
3632 : // last output of the instruction.
3633 375714 : Label check;
3634 : DCHECK_NE(0u, instr->OutputCount());
3635 375714 : Register reg = i.OutputRegister(instr->OutputCount() - 1);
3636 375714 : if (condition == kUnorderedEqual) {
3637 2726 : __ j(parity_odd, &check, Label::kNear);
3638 : __ movl(reg, Immediate(0));
3639 2726 : __ jmp(&done, Label::kNear);
3640 372988 : } else if (condition == kUnorderedNotEqual) {
3641 2496 : __ j(parity_odd, &check, Label::kNear);
3642 : __ movl(reg, Immediate(1));
3643 2496 : __ jmp(&done, Label::kNear);
3644 : }
3645 375714 : __ bind(&check);
3646 375714 : __ setcc(FlagsConditionToCondition(condition), reg);
3647 : __ movzxbl(reg, reg);
3648 375717 : __ bind(&done);
3649 375720 : }
3650 :
3651 33872 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3652 : X64OperandConverter i(this, instr);
3653 33872 : Register input = i.InputRegister(0);
3654 : std::vector<std::pair<int32_t, Label*>> cases;
3655 410520 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3656 376648 : cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3657 : }
3658 : AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3659 33873 : cases.data() + cases.size());
3660 33872 : }
3661 :
3662 0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3663 : X64OperandConverter i(this, instr);
3664 : Register input = i.InputRegister(0);
3665 0 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3666 0 : __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3667 0 : __ j(equal, GetLabel(i.InputRpo(index + 1)));
3668 : }
3669 0 : AssembleArchJump(i.InputRpo(1));
3670 0 : }
3671 :
3672 307 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3673 : X64OperandConverter i(this, instr);
3674 : Register input = i.InputRegister(0);
3675 307 : int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3676 307 : Label** cases = zone()->NewArray<Label*>(case_count);
3677 402357 : for (int32_t index = 0; index < case_count; ++index) {
3678 402050 : cases[index] = GetLabel(i.InputRpo(index + 2));
3679 : }
3680 307 : Label* const table = AddJumpTable(cases, case_count);
3681 307 : __ cmpl(input, Immediate(case_count));
3682 307 : __ j(above_equal, GetLabel(i.InputRpo(1)));
3683 614 : __ leaq(kScratchRegister, Operand(table));
3684 307 : __ jmp(Operand(kScratchRegister, input, times_8, 0));
3685 307 : }
3686 :
3687 : namespace {
3688 :
3689 : static const int kQuadWordSize = 16;
3690 :
3691 : } // namespace
3692 :
3693 2515398 : void CodeGenerator::FinishFrame(Frame* frame) {
3694 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3695 :
3696 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3697 2515398 : if (saves_fp != 0) {
3698 : frame->AlignSavedCalleeRegisterSlots();
3699 0 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3700 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3701 0 : frame->AllocateSavedCalleeRegisterSlots(
3702 0 : saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3703 : }
3704 : }
3705 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3706 2515398 : if (saves != 0) { // Save callee-saved registers.
3707 : int count = 0;
3708 29846685 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3709 14471120 : if (((1 << i) & saves)) {
3710 4522225 : ++count;
3711 : }
3712 : }
3713 : frame->AllocateSavedCalleeRegisterSlots(count);
3714 : }
3715 2515398 : }
3716 :
3717 2537961 : void CodeGenerator::AssembleConstructFrame() {
3718 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3719 2537961 : if (frame_access_state()->has_frame()) {
3720 : int pc_base = __ pc_offset();
3721 :
3722 2538176 : if (call_descriptor->IsCFunctionCall()) {
3723 904445 : __ pushq(rbp);
3724 : __ movq(rbp, rsp);
3725 1633731 : } else if (call_descriptor->IsJSFunctionCall()) {
3726 644073 : __ Prologue();
3727 644075 : if (call_descriptor->PushArgumentCount()) {
3728 38448 : __ pushq(kJavaScriptCallArgCountRegister);
3729 : }
3730 : } else {
3731 1979162 : __ StubPrologue(info()->GetOutputStackFrameType());
3732 989652 : if (call_descriptor->IsWasmFunctionCall()) {
3733 864304 : __ pushq(kWasmInstanceRegister);
3734 125348 : } else if (call_descriptor->IsWasmImportWrapper()) {
3735 : // WASM import wrappers are passed a tuple in the place of the instance.
3736 : // Unpack the tuple into the instance and the target callable.
3737 : // This must be done here in the codegen because it cannot be expressed
3738 : // properly in the graph.
3739 : __ LoadTaggedPointerField(
3740 : kJSFunctionRegister,
3741 6490 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3742 : __ LoadTaggedPointerField(
3743 : kWasmInstanceRegister,
3744 6490 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3745 6490 : __ pushq(kWasmInstanceRegister);
3746 : }
3747 : }
3748 :
3749 2538344 : unwinding_info_writer_.MarkFrameConstructed(pc_base);
3750 : }
3751 : int required_slots = frame()->GetTotalFrameSlotCount() -
3752 2537639 : call_descriptor->CalculateFixedFrameSize();
3753 :
3754 2538138 : if (info()->is_osr()) {
3755 : // TurboFan OSR-compiled functions cannot be entered directly.
3756 4698 : __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3757 :
3758 : // Unoptimized code jumps directly to this entrypoint while the unoptimized
3759 : // frame is still on the stack. Optimized code uses OSR values directly from
3760 : // the unoptimized frame. Thus, all that needs to be done is to allocate the
3761 : // remaining stack slots.
3762 4698 : if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3763 4698 : osr_pc_offset_ = __ pc_offset();
3764 4698 : required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3765 4698 : ResetSpeculationPoison();
3766 : }
3767 :
3768 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3769 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3770 :
3771 2538009 : if (required_slots > 0) {
3772 : DCHECK(frame_access_state()->has_frame());
3773 2044763 : if (info()->IsWasm() && required_slots > 128) {
3774 : // For WebAssembly functions with big frames we have to do the stack
3775 : // overflow check before we construct the frame. Otherwise we may not
3776 : // have enough space on the stack to call the runtime for the stack
3777 : // overflow.
3778 8 : Label done;
3779 :
3780 : // If the frame is bigger than the stack, we throw the stack overflow
3781 : // exception unconditionally. Thereby we can avoid the integer overflow
3782 : // check in the condition code.
3783 8 : if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3784 8 : __ movq(kScratchRegister,
3785 : FieldOperand(kWasmInstanceRegister,
3786 : WasmInstanceObject::kRealStackLimitAddressOffset));
3787 16 : __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3788 : __ addq(kScratchRegister,
3789 : Immediate(required_slots * kSystemPointerSize));
3790 : __ cmpq(rsp, kScratchRegister);
3791 8 : __ j(above_equal, &done);
3792 : }
3793 :
3794 8 : __ near_call(wasm::WasmCode::kWasmStackOverflow,
3795 8 : RelocInfo::WASM_STUB_CALL);
3796 : ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3797 : RecordSafepoint(reference_map, Safepoint::kSimple,
3798 8 : Safepoint::kNoLazyDeopt);
3799 8 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3800 8 : __ bind(&done);
3801 : }
3802 :
3803 : // Skip callee-saved and return slots, which are created below.
3804 2044763 : required_slots -= base::bits::CountPopulation(saves);
3805 : required_slots -= base::bits::CountPopulation(saves_fp) *
3806 2044763 : (kQuadWordSize / kSystemPointerSize);
3807 2044763 : required_slots -= frame()->GetReturnSlotCount();
3808 2044763 : if (required_slots > 0) {
3809 1868179 : __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
3810 : }
3811 : }
3812 :
3813 2538018 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3814 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3815 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3816 : // Adjust the stack pointer.
3817 0 : __ subq(rsp, Immediate(stack_size));
3818 : // Store the registers on the stack.
3819 : int slot_idx = 0;
3820 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3821 0 : if (!((1 << i) & saves_fp)) continue;
3822 0 : __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3823 0 : XMMRegister::from_code(i));
3824 0 : slot_idx++;
3825 : }
3826 : }
3827 :
3828 2538018 : if (saves != 0) { // Save callee-saved registers.
3829 29846685 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3830 14471120 : if (!((1 << i) & saves)) continue;
3831 4522225 : __ pushq(Register::from_code(i));
3832 : }
3833 : }
3834 :
3835 : // Allocate return slots (located after callee-saved).
3836 2538018 : if (frame()->GetReturnSlotCount() > 0) {
3837 672 : __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3838 : }
3839 2538018 : }
3840 :
3841 2787043 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3842 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3843 :
3844 : // Restore registers.
3845 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3846 2787043 : if (saves != 0) {
3847 : const int returns = frame()->GetReturnSlotCount();
3848 913813 : if (returns != 0) {
3849 656 : __ addq(rsp, Immediate(returns * kSystemPointerSize));
3850 : }
3851 30155829 : for (int i = 0; i < Register::kNumRegisters; i++) {
3852 14621008 : if (!((1 << i) & saves)) continue;
3853 4569065 : __ popq(Register::from_code(i));
3854 : }
3855 : }
3856 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3857 2787043 : if (saves_fp != 0) {
3858 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3859 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3860 : // Load the registers from the stack.
3861 : int slot_idx = 0;
3862 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3863 0 : if (!((1 << i) & saves_fp)) continue;
3864 0 : __ movdqu(XMMRegister::from_code(i),
3865 0 : Operand(rsp, kQuadWordSize * slot_idx));
3866 0 : slot_idx++;
3867 : }
3868 : // Adjust the stack pointer.
3869 0 : __ addq(rsp, Immediate(stack_size));
3870 : }
3871 :
3872 : unwinding_info_writer_.MarkBlockWillExit();
3873 :
3874 : // Might need rcx for scratch if pop_size is too big or if there is a variable
3875 : // pop count.
3876 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3877 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3878 2787011 : size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3879 : X64OperandConverter g(this, nullptr);
3880 2787011 : if (call_descriptor->IsCFunctionCall()) {
3881 913813 : AssembleDeconstructFrame();
3882 1873198 : } else if (frame_access_state()->has_frame()) {
3883 3618884 : if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3884 : // Canonicalize JSFunction return sites for now.
3885 1789545 : if (return_label_.is_bound()) {
3886 281455 : __ jmp(&return_label_);
3887 : return;
3888 : } else {
3889 1508090 : __ bind(&return_label_);
3890 1508187 : AssembleDeconstructFrame();
3891 : }
3892 : } else {
3893 39788 : AssembleDeconstructFrame();
3894 : }
3895 : }
3896 :
3897 2505971 : if (pop->IsImmediate()) {
3898 4932475 : pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3899 2466268 : CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3900 4932536 : __ Ret(static_cast<int>(pop_size), rcx);
3901 : } else {
3902 : Register pop_reg = g.ToRegister(pop);
3903 39764 : Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3904 39764 : __ popq(scratch_reg);
3905 79528 : __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3906 39764 : __ jmp(scratch_reg);
3907 : }
3908 : }
3909 :
3910 2514917 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3911 :
3912 38515432 : void CodeGenerator::AssembleMove(InstructionOperand* source,
3913 : InstructionOperand* destination) {
3914 : X64OperandConverter g(this, nullptr);
3915 : // Helper function to write the given constant to the dst register.
3916 19141472 : auto MoveConstantToRegister = [&](Register dst, Constant src) {
3917 19141472 : switch (src.type()) {
3918 : case Constant::kInt32: {
3919 3875880 : if (RelocInfo::IsWasmReference(src.rmode())) {
3920 27580563 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3921 : } else {
3922 : int32_t value = src.ToInt32();
3923 3875880 : if (value == 0) {
3924 992350 : __ xorl(dst, dst);
3925 : } else {
3926 2883530 : __ movl(dst, Immediate(value));
3927 : }
3928 : }
3929 : break;
3930 : }
3931 : case Constant::kInt64:
3932 1760941 : if (RelocInfo::IsWasmReference(src.rmode())) {
3933 0 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3934 : } else {
3935 1760941 : __ Set(dst, src.ToInt64());
3936 : }
3937 : break;
3938 : case Constant::kFloat32:
3939 688 : __ MoveNumber(dst, src.ToFloat32());
3940 344 : break;
3941 : case Constant::kFloat64:
3942 1790347 : __ MoveNumber(dst, src.ToFloat64().value());
3943 1790341 : break;
3944 : case Constant::kExternalReference:
3945 3272466 : __ Move(dst, src.ToExternalReference());
3946 3272488 : break;
3947 : case Constant::kHeapObject: {
3948 8439239 : Handle<HeapObject> src_object = src.ToHeapObject();
3949 : RootIndex index;
3950 8439231 : if (IsMaterializableFromRoot(src_object, &index)) {
3951 1920669 : __ LoadRoot(dst, index);
3952 : } else {
3953 6518560 : __ Move(dst, src_object);
3954 : }
3955 : break;
3956 : }
3957 : case Constant::kDelayedStringConstant: {
3958 2125 : const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3959 2125 : __ MoveStringConstant(dst, src_constant);
3960 2125 : break;
3961 : }
3962 : case Constant::kRpoNumber:
3963 0 : UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3964 : break;
3965 : }
3966 57657005 : };
3967 : // Helper function to write the given constant to the stack.
3968 38057 : auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3969 38057 : if (!RelocInfo::IsWasmReference(src.rmode())) {
3970 38057 : switch (src.type()) {
3971 : case Constant::kInt32:
3972 38057 : __ movq(dst, Immediate(src.ToInt32()));
3973 19413 : return;
3974 : case Constant::kInt64:
3975 12727 : __ Set(dst, src.ToInt64());
3976 12727 : return;
3977 : default:
3978 : break;
3979 : }
3980 : }
3981 5917 : MoveConstantToRegister(kScratchRegister, src);
3982 5917 : __ movq(dst, kScratchRegister);
3983 38515432 : };
3984 : // Dispatch on the source and destination operand kinds.
3985 38515432 : switch (MoveType::InferMove(source, destination)) {
3986 : case MoveType::kRegisterToRegister:
3987 4149488 : if (source->IsRegister()) {
3988 4037106 : __ movq(g.ToRegister(destination), g.ToRegister(source));
3989 : } else {
3990 : DCHECK(source->IsFPRegister());
3991 : __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3992 : }
3993 : return;
3994 : case MoveType::kRegisterToStack: {
3995 5794330 : Operand dst = g.ToOperand(destination);
3996 5794330 : if (source->IsRegister()) {
3997 5447362 : __ movq(dst, g.ToRegister(source));
3998 : } else {
3999 : DCHECK(source->IsFPRegister());
4000 : XMMRegister src = g.ToDoubleRegister(source);
4001 : MachineRepresentation rep =
4002 : LocationOperand::cast(source)->representation();
4003 346968 : if (rep != MachineRepresentation::kSimd128) {
4004 : __ Movsd(dst, src);
4005 : } else {
4006 : __ Movups(dst, src);
4007 : }
4008 : }
4009 : return;
4010 : }
4011 : case MoveType::kStackToRegister: {
4012 9030184 : Operand src = g.ToOperand(source);
4013 9030184 : if (source->IsStackSlot()) {
4014 8518159 : __ movq(g.ToRegister(destination), src);
4015 : } else {
4016 : DCHECK(source->IsFPStackSlot());
4017 : XMMRegister dst = g.ToDoubleRegister(destination);
4018 : MachineRepresentation rep =
4019 : LocationOperand::cast(source)->representation();
4020 512025 : if (rep != MachineRepresentation::kSimd128) {
4021 : __ Movsd(dst, src);
4022 : } else {
4023 : __ Movups(dst, src);
4024 : }
4025 : }
4026 : return;
4027 : }
4028 : case MoveType::kStackToStack: {
4029 44958 : Operand src = g.ToOperand(source);
4030 44958 : Operand dst = g.ToOperand(destination);
4031 44958 : if (source->IsStackSlot()) {
4032 : // Spill on demand to use a temporary register for memory-to-memory
4033 : // moves.
4034 24813 : __ movq(kScratchRegister, src);
4035 : __ movq(dst, kScratchRegister);
4036 : } else {
4037 : MachineRepresentation rep =
4038 : LocationOperand::cast(source)->representation();
4039 20145 : if (rep != MachineRepresentation::kSimd128) {
4040 : __ Movsd(kScratchDoubleReg, src);
4041 : __ Movsd(dst, kScratchDoubleReg);
4042 : } else {
4043 : DCHECK(source->IsSimd128StackSlot());
4044 : __ Movups(kScratchDoubleReg, src);
4045 : __ Movups(dst, kScratchDoubleReg);
4046 : }
4047 : }
4048 : return;
4049 : }
4050 : case MoveType::kConstantToRegister: {
4051 19451822 : Constant src = g.ToConstant(source);
4052 19451982 : if (destination->IsRegister()) {
4053 19135581 : MoveConstantToRegister(g.ToRegister(destination), src);
4054 : } else {
4055 : DCHECK(destination->IsFPRegister());
4056 316401 : XMMRegister dst = g.ToDoubleRegister(destination);
4057 316401 : if (src.type() == Constant::kFloat32) {
4058 : // TODO(turbofan): Can we do better here?
4059 12670 : __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4060 : } else {
4061 : DCHECK_EQ(src.type(), Constant::kFloat64);
4062 303731 : __ Move(dst, src.ToFloat64().AsUint64());
4063 : }
4064 : }
4065 : return;
4066 : }
4067 : case MoveType::kConstantToStack: {
4068 44616 : Constant src = g.ToConstant(source);
4069 44616 : Operand dst = g.ToOperand(destination);
4070 44616 : if (destination->IsStackSlot()) {
4071 38057 : MoveConstantToSlot(dst, src);
4072 : } else {
4073 : DCHECK(destination->IsFPStackSlot());
4074 6559 : if (src.type() == Constant::kFloat32) {
4075 3008 : __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4076 : } else {
4077 : DCHECK_EQ(src.type(), Constant::kFloat64);
4078 3551 : __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4079 : __ movq(dst, kScratchRegister);
4080 : }
4081 : }
4082 : return;
4083 : }
4084 : }
4085 0 : UNREACHABLE();
4086 : }
4087 :
4088 66091 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
4089 : InstructionOperand* destination) {
4090 : X64OperandConverter g(this, nullptr);
4091 : // Dispatch on the source and destination operand kinds. Not all
4092 : // combinations are possible.
4093 66091 : switch (MoveType::InferSwap(source, destination)) {
4094 : case MoveType::kRegisterToRegister: {
4095 55667 : if (source->IsRegister()) {
4096 : Register src = g.ToRegister(source);
4097 : Register dst = g.ToRegister(destination);
4098 53100 : __ movq(kScratchRegister, src);
4099 : __ movq(src, dst);
4100 : __ movq(dst, kScratchRegister);
4101 : } else {
4102 : DCHECK(source->IsFPRegister());
4103 : XMMRegister src = g.ToDoubleRegister(source);
4104 : XMMRegister dst = g.ToDoubleRegister(destination);
4105 : __ Movapd(kScratchDoubleReg, src);
4106 : __ Movapd(src, dst);
4107 : __ Movapd(dst, kScratchDoubleReg);
4108 : }
4109 : return;
4110 : }
4111 : case MoveType::kRegisterToStack: {
4112 6767 : if (source->IsRegister()) {
4113 : Register src = g.ToRegister(source);
4114 1583 : __ pushq(src);
4115 : frame_access_state()->IncreaseSPDelta(1);
4116 1583 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4117 1583 : kSystemPointerSize);
4118 : __ movq(src, g.ToOperand(destination));
4119 : frame_access_state()->IncreaseSPDelta(-1);
4120 1583 : __ popq(g.ToOperand(destination));
4121 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4122 1583 : -kSystemPointerSize);
4123 : } else {
4124 : DCHECK(source->IsFPRegister());
4125 : XMMRegister src = g.ToDoubleRegister(source);
4126 5184 : Operand dst = g.ToOperand(destination);
4127 : MachineRepresentation rep =
4128 : LocationOperand::cast(source)->representation();
4129 5184 : if (rep != MachineRepresentation::kSimd128) {
4130 : __ Movsd(kScratchDoubleReg, src);
4131 : __ Movsd(src, dst);
4132 : __ Movsd(dst, kScratchDoubleReg);
4133 : } else {
4134 : __ Movups(kScratchDoubleReg, src);
4135 : __ Movups(src, dst);
4136 : __ Movups(dst, kScratchDoubleReg);
4137 : }
4138 : }
4139 : return;
4140 : }
4141 : case MoveType::kStackToStack: {
4142 3657 : Operand src = g.ToOperand(source);
4143 3657 : Operand dst = g.ToOperand(destination);
4144 : MachineRepresentation rep =
4145 : LocationOperand::cast(source)->representation();
4146 3657 : if (rep != MachineRepresentation::kSimd128) {
4147 : Register tmp = kScratchRegister;
4148 2789 : __ movq(tmp, dst);
4149 2789 : __ pushq(src); // Then use stack to copy src to destination.
4150 2789 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4151 2789 : kSystemPointerSize);
4152 2789 : __ popq(dst);
4153 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4154 2789 : -kSystemPointerSize);
4155 : __ movq(src, tmp);
4156 : } else {
4157 : // Without AVX, misaligned reads and writes will trap. Move using the
4158 : // stack, in two parts.
4159 868 : __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4160 868 : __ pushq(src); // Then use stack to copy src to destination.
4161 868 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4162 868 : kSystemPointerSize);
4163 868 : __ popq(dst);
4164 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4165 868 : -kSystemPointerSize);
4166 868 : __ pushq(g.ToOperand(source, kSystemPointerSize));
4167 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4168 868 : kSystemPointerSize);
4169 868 : __ popq(g.ToOperand(destination, kSystemPointerSize));
4170 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4171 868 : -kSystemPointerSize);
4172 868 : __ movups(src, kScratchDoubleReg);
4173 : }
4174 : return;
4175 : }
4176 : default:
4177 0 : UNREACHABLE();
4178 : break;
4179 : }
4180 : }
4181 :
4182 307 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4183 402357 : for (size_t index = 0; index < target_count; ++index) {
4184 201025 : __ dq(targets[index]);
4185 : }
4186 307 : }
4187 :
4188 : #undef __
4189 :
4190 : } // namespace compiler
4191 : } // namespace internal
4192 120216 : } // namespace v8
|