Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/code-generator.h"
6 :
7 : #include <limits>
8 :
9 : #include "src/base/overflowing-math.h"
10 : #include "src/compiler/backend/code-generator-impl.h"
11 : #include "src/compiler/backend/gap-resolver.h"
12 : #include "src/compiler/node-matchers.h"
13 : #include "src/compiler/osr.h"
14 : #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 : #include "src/macro-assembler.h"
16 : #include "src/objects/smi.h"
17 : #include "src/optimized-compilation-info.h"
18 : #include "src/wasm/wasm-code-manager.h"
19 : #include "src/wasm/wasm-objects.h"
20 : #include "src/x64/assembler-x64.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 : namespace compiler {
25 :
26 : #define __ tasm()->
27 :
28 : // Adds X64 specific methods for decoding operands.
29 : class X64OperandConverter : public InstructionOperandConverter {
30 : public:
31 : X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : : InstructionOperandConverter(gen, instr) {}
33 :
34 : Immediate InputImmediate(size_t index) {
35 5335996 : return ToImmediate(instr_->InputAt(index));
36 : }
37 :
38 1056256 : Operand InputOperand(size_t index, int extra = 0) {
39 2112523 : return ToOperand(instr_->InputAt(index), extra);
40 : }
41 :
42 0 : Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 :
44 4382334 : Immediate ToImmediate(InstructionOperand* operand) {
45 4382334 : Constant constant = ToConstant(operand);
46 4382452 : if (constant.type() == Constant::kFloat64) {
47 : DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 365963 : return Immediate(0);
49 : }
50 4016489 : if (RelocInfo::IsWasmReference(constant.rmode())) {
51 0 : return Immediate(constant.ToInt32(), constant.rmode());
52 : }
53 4016489 : return Immediate(constant.ToInt32());
54 : }
55 :
56 : Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 : DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 15785324 : return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 : }
60 :
61 15798210 : Operand SlotToOperand(int slot_index, int extra = 0) {
62 15798210 : FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 : return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 31596262 : offset.offset() + extra);
65 : }
66 :
67 : static size_t NextOffset(size_t* offset) {
68 16339771 : size_t i = *offset;
69 29251325 : (*offset)++;
70 : return i;
71 : }
72 :
73 : static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 : STATIC_ASSERT(0 == static_cast<int>(times_1));
75 : STATIC_ASSERT(1 == static_cast<int>(times_2));
76 : STATIC_ASSERT(2 == static_cast<int>(times_4));
77 : STATIC_ASSERT(3 == static_cast<int>(times_8));
78 1302136 : int scale = static_cast<int>(mode - one);
79 : DCHECK(scale >= 0 && scale < 4);
80 1302136 : return static_cast<ScaleFactor>(scale);
81 : }
82 :
83 16339771 : Operand MemoryOperand(size_t* offset) {
84 16339771 : AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 16339771 : switch (mode) {
86 : case kMode_MR: {
87 2362025 : Register base = InputRegister(NextOffset(offset));
88 : int32_t disp = 0;
89 2362025 : return Operand(base, disp);
90 : }
91 : case kMode_MRI: {
92 11173551 : Register base = InputRegister(NextOffset(offset));
93 : int32_t disp = InputInt32(NextOffset(offset));
94 11173380 : return Operand(base, disp);
95 : }
96 : case kMode_MR1:
97 : case kMode_MR2:
98 : case kMode_MR4:
99 : case kMode_MR8: {
100 696983 : Register base = InputRegister(NextOffset(offset));
101 696983 : Register index = InputRegister(NextOffset(offset));
102 : ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 : int32_t disp = 0;
104 696983 : return Operand(base, index, scale, disp);
105 : }
106 : case kMode_MR1I:
107 : case kMode_MR2I:
108 : case kMode_MR4I:
109 : case kMode_MR8I: {
110 455620 : Register base = InputRegister(NextOffset(offset));
111 455620 : Register index = InputRegister(NextOffset(offset));
112 : ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 : int32_t disp = InputInt32(NextOffset(offset));
114 455618 : return Operand(base, index, scale, disp);
115 : }
116 : case kMode_M1: {
117 0 : Register base = InputRegister(NextOffset(offset));
118 : int32_t disp = 0;
119 0 : return Operand(base, disp);
120 : }
121 : case kMode_M2:
122 0 : UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 : return Operand(no_reg, 0);
124 : case kMode_M4:
125 : case kMode_M8: {
126 19753 : Register index = InputRegister(NextOffset(offset));
127 : ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 : int32_t disp = 0;
129 19753 : return Operand(index, scale, disp);
130 : }
131 : case kMode_M1I:
132 : case kMode_M2I:
133 : case kMode_M4I:
134 : case kMode_M8I: {
135 129780 : Register index = InputRegister(NextOffset(offset));
136 : ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 : int32_t disp = InputInt32(NextOffset(offset));
138 129780 : return Operand(index, scale, disp);
139 : }
140 : case kMode_Root: {
141 1502059 : Register base = kRootRegister;
142 : int32_t disp = InputInt32(NextOffset(offset));
143 1502060 : return Operand(base, disp);
144 : }
145 : case kMode_None:
146 0 : UNREACHABLE();
147 : }
148 0 : UNREACHABLE();
149 : }
150 :
151 : Operand MemoryOperand(size_t first_input = 0) {
152 9239072 : return MemoryOperand(&first_input);
153 : }
154 : };
155 :
156 : namespace {
157 :
158 : bool HasImmediateInput(Instruction* instr, size_t index) {
159 : return instr->InputAt(index)->IsImmediate();
160 : }
161 :
162 0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 : public:
164 : OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 132 : : OutOfLineCode(gen), result_(result) {}
166 :
167 132 : void Generate() final {
168 : __ Xorps(result_, result_);
169 : __ Divss(result_, result_);
170 132 : }
171 :
172 : private:
173 : XMMRegister const result_;
174 : };
175 :
176 0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 : public:
178 : OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 592 : : OutOfLineCode(gen), result_(result) {}
180 :
181 592 : void Generate() final {
182 : __ Xorpd(result_, result_);
183 : __ Divsd(result_, result_);
184 592 : }
185 :
186 : private:
187 : XMMRegister const result_;
188 : };
189 :
190 0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 : public:
192 : OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 : XMMRegister input, StubCallMode stub_mode,
194 : UnwindingInfoWriter* unwinding_info_writer)
195 : : OutOfLineCode(gen),
196 : result_(result),
197 : input_(input),
198 : stub_mode_(stub_mode),
199 : unwinding_info_writer_(unwinding_info_writer),
200 : isolate_(gen->isolate()),
201 53313 : zone_(gen->zone()) {}
202 :
203 53310 : void Generate() final {
204 53310 : __ subq(rsp, Immediate(kDoubleSize));
205 53312 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 53312 : kDoubleSize);
207 106626 : __ Movsd(MemOperand(rsp, 0), input_);
208 53312 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 : // A direct call to a wasm runtime stub defined in this module.
210 : // Just encode the stub index. This will be patched when the code
211 : // is added to the native module and copied into wasm code space.
212 1625 : __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 : } else {
214 103374 : __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 : }
216 106632 : __ movl(result_, MemOperand(rsp, 0));
217 53316 : __ addq(rsp, Immediate(kDoubleSize));
218 53316 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 53316 : -kDoubleSize);
220 53316 : }
221 :
222 : private:
223 : Register const result_;
224 : XMMRegister const input_;
225 : StubCallMode stub_mode_;
226 : UnwindingInfoWriter* const unwinding_info_writer_;
227 : Isolate* isolate_;
228 : Zone* zone_;
229 : };
230 :
231 0 : class OutOfLineRecordWrite final : public OutOfLineCode {
232 : public:
233 : OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 : Register value, Register scratch0, Register scratch1,
235 : RecordWriteMode mode, StubCallMode stub_mode)
236 : : OutOfLineCode(gen),
237 : object_(object),
238 : operand_(operand),
239 : value_(value),
240 : scratch0_(scratch0),
241 : scratch1_(scratch1),
242 : mode_(mode),
243 : stub_mode_(stub_mode),
244 319799 : zone_(gen->zone()) {}
245 :
246 319798 : void Generate() final {
247 319798 : if (mode_ > RecordWriteMode::kValueIsPointer) {
248 252921 : __ JumpIfSmi(value_, exit());
249 : }
250 : if (COMPRESS_POINTERS_BOOL) {
251 : __ DecompressTaggedPointer(value_, value_);
252 : }
253 : __ CheckPageFlag(value_, scratch0_,
254 : MemoryChunk::kPointersToHereAreInterestingMask, zero,
255 319799 : exit());
256 319799 : __ leaq(scratch1_, operand_);
257 :
258 : RememberedSetAction const remembered_set_action =
259 319799 : mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
260 319799 : : OMIT_REMEMBERED_SET;
261 : SaveFPRegsMode const save_fp_mode =
262 319799 : frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
263 :
264 319799 : if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
265 112 : __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
266 319687 : } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
267 : // A direct call to a wasm runtime stub defined in this module.
268 : // Just encode the stub index. This will be patched when the code
269 : // is added to the native module and copied into wasm code space.
270 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
271 286 : save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
272 : } else {
273 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
274 319401 : save_fp_mode);
275 : }
276 319799 : }
277 :
278 : private:
279 : Register const object_;
280 : Operand const operand_;
281 : Register const value_;
282 : Register const scratch0_;
283 : Register const scratch1_;
284 : RecordWriteMode const mode_;
285 : StubCallMode const stub_mode_;
286 : Zone* zone_;
287 : };
288 :
289 0 : class WasmOutOfLineTrap : public OutOfLineCode {
290 : public:
291 : WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
292 365784 : : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
293 :
294 142017 : void Generate() override {
295 142017 : X64OperandConverter i(gen_, instr_);
296 : TrapId trap_id =
297 284085 : static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
298 : GenerateWithTrapId(trap_id);
299 142115 : }
300 :
301 : protected:
302 : CodeGenerator* gen_;
303 :
304 365611 : void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
305 :
306 : private:
307 365475 : void GenerateCallToTrap(TrapId trap_id) {
308 365475 : if (!gen_->wasm_runtime_exception_support()) {
309 : // We cannot test calls to the runtime in cctest/test-run-wasm.
310 : // Therefore we emit a call to C here instead of a call to the runtime.
311 153836 : __ PrepareCallCFunction(0);
312 153836 : __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
313 153836 : 0);
314 153836 : __ LeaveFrame(StackFrame::WASM_COMPILED);
315 153836 : auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
316 : size_t pop_size =
317 153836 : call_descriptor->StackParameterCount() * kSystemPointerSize;
318 : // Use rcx as a scratch register, we return anyways immediately.
319 153836 : __ Ret(static_cast<int>(pop_size), rcx);
320 : } else {
321 211719 : gen_->AssembleSourcePosition(instr_);
322 : // A direct call to a wasm runtime stub defined in this module.
323 : // Just encode the stub index. This will be patched when the code
324 : // is added to the native module and copied into wasm code space.
325 211976 : __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
326 : ReferenceMap* reference_map =
327 211796 : new (gen_->zone()) ReferenceMap(gen_->zone());
328 211600 : gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
329 211600 : Safepoint::kNoLazyDeopt);
330 212115 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
331 : }
332 365643 : }
333 :
334 : Instruction* instr_;
335 : };
336 :
337 0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
338 : public:
339 : WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
340 223397 : : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
341 :
342 223342 : void Generate() final {
343 223342 : gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
344 223543 : GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
345 223570 : }
346 :
347 : private:
348 : int pc_;
349 : };
350 :
351 12520123 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
352 : InstructionCode opcode, Instruction* instr,
353 : X64OperandConverter& i, int pc) {
354 : const MemoryAccessMode access_mode =
355 12520123 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
356 12520123 : if (access_mode == kMemoryAccessProtected) {
357 : new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
358 : }
359 12519801 : }
360 :
361 11855842 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
362 : InstructionCode opcode, Instruction* instr,
363 : X64OperandConverter& i) {
364 : const MemoryAccessMode access_mode =
365 11855842 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
366 11855842 : if (access_mode == kMemoryAccessPoisoned) {
367 : Register value = i.OutputRegister();
368 0 : codegen->tasm()->andq(value, kSpeculationPoisonRegister);
369 : }
370 11855842 : }
371 :
372 : } // namespace
373 :
374 : #define ASSEMBLE_UNOP(asm_instr) \
375 : do { \
376 : if (instr->Output()->IsRegister()) { \
377 : __ asm_instr(i.OutputRegister()); \
378 : } else { \
379 : __ asm_instr(i.OutputOperand()); \
380 : } \
381 : } while (false)
382 :
383 : #define ASSEMBLE_BINOP(asm_instr) \
384 : do { \
385 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
386 : size_t index = 1; \
387 : Operand right = i.MemoryOperand(&index); \
388 : __ asm_instr(i.InputRegister(0), right); \
389 : } else { \
390 : if (HasImmediateInput(instr, 1)) { \
391 : if (instr->InputAt(0)->IsRegister()) { \
392 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
393 : } else { \
394 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
395 : } \
396 : } else { \
397 : if (instr->InputAt(1)->IsRegister()) { \
398 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
399 : } else { \
400 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
401 : } \
402 : } \
403 : } \
404 : } while (false)
405 :
406 : #define ASSEMBLE_COMPARE(asm_instr) \
407 : do { \
408 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
409 : size_t index = 0; \
410 : Operand left = i.MemoryOperand(&index); \
411 : if (HasImmediateInput(instr, index)) { \
412 : __ asm_instr(left, i.InputImmediate(index)); \
413 : } else { \
414 : __ asm_instr(left, i.InputRegister(index)); \
415 : } \
416 : } else { \
417 : if (HasImmediateInput(instr, 1)) { \
418 : if (instr->InputAt(0)->IsRegister()) { \
419 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
420 : } else { \
421 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
422 : } \
423 : } else { \
424 : if (instr->InputAt(1)->IsRegister()) { \
425 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
426 : } else { \
427 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
428 : } \
429 : } \
430 : } \
431 : } while (false)
432 :
433 : #define ASSEMBLE_MULT(asm_instr) \
434 : do { \
435 : if (HasImmediateInput(instr, 1)) { \
436 : if (instr->InputAt(0)->IsRegister()) { \
437 : __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
438 : i.InputImmediate(1)); \
439 : } else { \
440 : __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
441 : i.InputImmediate(1)); \
442 : } \
443 : } else { \
444 : if (instr->InputAt(1)->IsRegister()) { \
445 : __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
446 : } else { \
447 : __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
448 : } \
449 : } \
450 : } while (false)
451 :
452 : #define ASSEMBLE_SHIFT(asm_instr, width) \
453 : do { \
454 : if (HasImmediateInput(instr, 1)) { \
455 : if (instr->Output()->IsRegister()) { \
456 : __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
457 : } else { \
458 : __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
459 : } \
460 : } else { \
461 : if (instr->Output()->IsRegister()) { \
462 : __ asm_instr##_cl(i.OutputRegister()); \
463 : } else { \
464 : __ asm_instr##_cl(i.OutputOperand()); \
465 : } \
466 : } \
467 : } while (false)
468 :
469 : #define ASSEMBLE_MOVX(asm_instr) \
470 : do { \
471 : if (instr->addressing_mode() != kMode_None) { \
472 : __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
473 : } else if (instr->InputAt(0)->IsRegister()) { \
474 : __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
475 : } else { \
476 : __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
477 : } \
478 : } while (false)
479 :
480 : #define ASSEMBLE_SSE_BINOP(asm_instr) \
481 : do { \
482 : if (instr->InputAt(1)->IsFPRegister()) { \
483 : __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
484 : } else { \
485 : __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
486 : } \
487 : } while (false)
488 :
489 : #define ASSEMBLE_SSE_UNOP(asm_instr) \
490 : do { \
491 : if (instr->InputAt(0)->IsFPRegister()) { \
492 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
493 : } else { \
494 : __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
495 : } \
496 : } while (false)
497 :
498 : #define ASSEMBLE_AVX_BINOP(asm_instr) \
499 : do { \
500 : CpuFeatureScope avx_scope(tasm(), AVX); \
501 : if (instr->InputAt(1)->IsFPRegister()) { \
502 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
503 : i.InputDoubleRegister(1)); \
504 : } else { \
505 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
506 : i.InputOperand(1)); \
507 : } \
508 : } while (false)
509 :
510 : #define ASSEMBLE_IEEE754_BINOP(name) \
511 : do { \
512 : __ PrepareCallCFunction(2); \
513 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
514 : } while (false)
515 :
516 : #define ASSEMBLE_IEEE754_UNOP(name) \
517 : do { \
518 : __ PrepareCallCFunction(1); \
519 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
520 : } while (false)
521 :
522 : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
523 : do { \
524 : Label binop; \
525 : __ bind(&binop); \
526 : __ mov_inst(rax, i.MemoryOperand(1)); \
527 : __ movl(i.TempRegister(0), rax); \
528 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
529 : __ lock(); \
530 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
531 : __ j(not_equal, &binop); \
532 : } while (false)
533 :
534 : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
535 : do { \
536 : Label binop; \
537 : __ bind(&binop); \
538 : __ mov_inst(rax, i.MemoryOperand(1)); \
539 : __ movq(i.TempRegister(0), rax); \
540 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
541 : __ lock(); \
542 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
543 : __ j(not_equal, &binop); \
544 : } while (false)
545 :
546 : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
547 : do { \
548 : if (instr->InputAt(index)->IsSimd128Register()) { \
549 : __ opcode(dst_operand, i.InputSimd128Register(index)); \
550 : } else { \
551 : __ opcode(dst_operand, i.InputOperand(index)); \
552 : } \
553 : } while (false)
554 :
555 : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
556 : do { \
557 : if (instr->InputAt(index)->IsSimd128Register()) { \
558 : __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
559 : } else { \
560 : __ opcode(dst_operand, i.InputOperand(index), imm); \
561 : } \
562 : } while (false)
563 :
564 : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
565 : do { \
566 : XMMRegister dst = i.OutputSimd128Register(); \
567 : DCHECK_EQ(dst, i.InputSimd128Register(0)); \
568 : byte input_index = instr->InputCount() == 2 ? 1 : 0; \
569 : ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
570 : } while (false)
571 :
572 : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
573 : do { \
574 : CpuFeatureScope sse_scope(tasm(), SSELevel); \
575 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
576 : __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
577 : } while (false)
578 :
579 2637246 : void CodeGenerator::AssembleDeconstructFrame() {
580 2637246 : unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
581 2637517 : __ movq(rsp, rbp);
582 2638544 : __ popq(rbp);
583 2638260 : }
584 :
585 119850 : void CodeGenerator::AssemblePrepareTailCall() {
586 119850 : if (frame_access_state()->has_frame()) {
587 137125 : __ movq(rbp, MemOperand(rbp, 0));
588 : }
589 : frame_access_state()->SetFrameAccessToSP();
590 119851 : }
591 :
592 1288 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
593 : Register scratch1,
594 : Register scratch2,
595 : Register scratch3) {
596 : DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
597 1288 : Label done;
598 :
599 : // Check if current frame is an arguments adaptor frame.
600 2576 : __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
601 1288 : Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
602 1288 : __ j(not_equal, &done, Label::kNear);
603 :
604 : // Load arguments count from current arguments adaptor frame (note, it
605 : // does not include receiver).
606 1288 : Register caller_args_count_reg = scratch1;
607 2576 : __ SmiUntag(caller_args_count_reg,
608 1288 : Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
609 :
610 : ParameterCount callee_args_count(args_reg);
611 : __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
612 1288 : scratch3);
613 1288 : __ bind(&done);
614 1288 : }
615 :
616 : namespace {
617 :
618 271393 : void AdjustStackPointerForTailCall(Assembler* assembler,
619 : FrameAccessState* state,
620 : int new_slot_above_sp,
621 : bool allow_shrinkage = true) {
622 : int current_sp_offset = state->GetSPToFPSlotCount() +
623 271393 : StandardFrameConstants::kFixedSlotCountAboveFp;
624 271393 : int stack_slot_delta = new_slot_above_sp - current_sp_offset;
625 271393 : if (stack_slot_delta > 0) {
626 780 : assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
627 : state->IncreaseSPDelta(stack_slot_delta);
628 270613 : } else if (allow_shrinkage && stack_slot_delta < 0) {
629 68215 : assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
630 : state->IncreaseSPDelta(stack_slot_delta);
631 : }
632 271393 : }
633 :
634 2096 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
635 2096 : int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
636 2096 : assembler->movq(kScratchRegister, shuffle_mask);
637 2096 : assembler->Push(kScratchRegister);
638 2096 : shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
639 : assembler->movq(kScratchRegister, shuffle_mask);
640 2096 : assembler->Push(kScratchRegister);
641 2096 : }
642 :
643 : } // namespace
644 :
645 119862 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
646 : int first_unused_stack_slot) {
647 119862 : CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
648 : ZoneVector<MoveOperands*> pushes(zone());
649 119862 : GetPushCompatibleMoves(instr, flags, &pushes);
650 :
651 132903 : if (!pushes.empty() &&
652 26080 : (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
653 : first_unused_stack_slot)) {
654 : X64OperandConverter g(this, instr);
655 44708 : for (auto move : pushes) {
656 : LocationOperand destination_location(
657 : LocationOperand::cast(move->destination()));
658 31668 : InstructionOperand source(move->source());
659 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
660 31668 : destination_location.index());
661 31668 : if (source.IsStackSlot()) {
662 : LocationOperand source_location(LocationOperand::cast(source));
663 13052 : __ Push(g.SlotToOperand(source_location.index()));
664 18616 : } else if (source.IsRegister()) {
665 : LocationOperand source_location(LocationOperand::cast(source));
666 18616 : __ Push(source_location.GetRegister());
667 0 : } else if (source.IsImmediate()) {
668 0 : __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
669 : } else {
670 : // Pushes of non-scalar data types is not supported.
671 0 : UNIMPLEMENTED();
672 : }
673 : frame_access_state()->IncreaseSPDelta(1);
674 : move->Eliminate();
675 : }
676 : }
677 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
678 119863 : first_unused_stack_slot, false);
679 119862 : }
680 :
681 119863 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
682 : int first_unused_stack_slot) {
683 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
684 119863 : first_unused_stack_slot);
685 119863 : }
686 :
687 : // Check that {kJavaScriptCallCodeStartRegister} is correct.
688 114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
689 114 : __ ComputeCodeStartAddress(rbx);
690 114 : __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
691 114 : __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
692 114 : }
693 :
694 : // Check if the code object is marked for deoptimization. If it is, then it
695 : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
696 : // to:
697 : // 1. read from memory the word that contains that bit, which can be found in
698 : // the flags in the referenced {CodeDataContainer} object;
699 : // 2. test kMarkedForDeoptimizationBit in those flags; and
700 : // 3. if it is not zero then it jumps to the builtin.
701 463882 : void CodeGenerator::BailoutIfDeoptimized() {
702 : int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
703 927762 : __ LoadTaggedPointerField(rbx,
704 463886 : Operand(kJavaScriptCallCodeStartRegister, offset));
705 463890 : __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
706 : Immediate(1 << Code::kMarkedForDeoptimizationBit));
707 463880 : __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
708 463888 : RelocInfo::CODE_TARGET, not_zero);
709 463885 : }
710 :
711 0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
712 : // Set a mask which has all bits set in the normal case, but has all
713 : // bits cleared if we are speculatively executing the wrong PC.
714 0 : __ ComputeCodeStartAddress(rbx);
715 0 : __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
716 : __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
717 : __ movq(rbx, Immediate(-1));
718 0 : __ cmovq(equal, kSpeculationPoisonRegister, rbx);
719 0 : }
720 :
721 0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
722 0 : __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
723 : __ andq(kContextRegister, kSpeculationPoisonRegister);
724 : __ andq(rsp, kSpeculationPoisonRegister);
725 0 : }
726 :
727 : // Assembles an instruction after register allocation, producing machine code.
728 68646744 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
729 : Instruction* instr) {
730 : X64OperandConverter i(this, instr);
731 : InstructionCode opcode = instr->opcode();
732 68646744 : ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
733 68646744 : switch (arch_opcode) {
734 : case kArchCallCodeObject: {
735 4827589 : if (HasImmediateInput(instr, 0)) {
736 4451090 : Handle<Code> code = i.InputCode(0);
737 4451090 : __ Call(code, RelocInfo::CODE_TARGET);
738 : } else {
739 376508 : Register reg = i.InputRegister(0);
740 : DCHECK_IMPLIES(
741 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
742 : reg == kJavaScriptCallCodeStartRegister);
743 376508 : __ LoadCodeObjectEntry(reg, reg);
744 376513 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
745 0 : __ RetpolineCall(reg);
746 : } else {
747 376513 : __ call(reg);
748 : }
749 : }
750 4827621 : RecordCallPosition(instr);
751 : frame_access_state()->ClearSPDelta();
752 : break;
753 : }
754 : case kArchCallBuiltinPointer: {
755 : DCHECK(!HasImmediateInput(instr, 0));
756 3608 : Register builtin_pointer = i.InputRegister(0);
757 3608 : __ CallBuiltinPointer(builtin_pointer);
758 3608 : RecordCallPosition(instr);
759 : frame_access_state()->ClearSPDelta();
760 : break;
761 : }
762 : case kArchCallWasmFunction: {
763 1181915 : if (HasImmediateInput(instr, 0)) {
764 192442 : Constant constant = i.ToConstant(instr->InputAt(0));
765 192491 : Address wasm_code = static_cast<Address>(constant.ToInt64());
766 192491 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
767 192499 : __ near_call(wasm_code, constant.rmode());
768 : } else {
769 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
770 0 : __ RetpolineCall(wasm_code, constant.rmode());
771 : } else {
772 0 : __ Call(wasm_code, constant.rmode());
773 : }
774 : }
775 : } else {
776 989473 : Register reg = i.InputRegister(0);
777 989473 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
778 0 : __ RetpolineCall(reg);
779 : } else {
780 989473 : __ call(reg);
781 : }
782 : }
783 1181915 : RecordCallPosition(instr);
784 : frame_access_state()->ClearSPDelta();
785 : break;
786 : }
787 : case kArchTailCallCodeObjectFromJSFunction:
788 : case kArchTailCallCodeObject: {
789 36864 : if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
790 : AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
791 : i.TempRegister(0), i.TempRegister(1),
792 1288 : i.TempRegister(2));
793 : }
794 36864 : if (HasImmediateInput(instr, 0)) {
795 31240 : Handle<Code> code = i.InputCode(0);
796 31240 : __ Jump(code, RelocInfo::CODE_TARGET);
797 : } else {
798 5624 : Register reg = i.InputRegister(0);
799 : DCHECK_IMPLIES(
800 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
801 : reg == kJavaScriptCallCodeStartRegister);
802 5624 : __ LoadCodeObjectEntry(reg, reg);
803 5624 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
804 0 : __ RetpolineJump(reg);
805 : } else {
806 5624 : __ jmp(reg);
807 : }
808 : }
809 : unwinding_info_writer_.MarkBlockWillExit();
810 : frame_access_state()->ClearSPDelta();
811 36864 : frame_access_state()->SetFrameAccessToDefault();
812 36864 : break;
813 : }
814 : case kArchTailCallWasm: {
815 219 : if (HasImmediateInput(instr, 0)) {
816 129 : Constant constant = i.ToConstant(instr->InputAt(0));
817 : Address wasm_code = static_cast<Address>(constant.ToInt64());
818 129 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
819 129 : __ near_jmp(wasm_code, constant.rmode());
820 : } else {
821 : __ Move(kScratchRegister, wasm_code, constant.rmode());
822 0 : __ jmp(kScratchRegister);
823 : }
824 : } else {
825 90 : Register reg = i.InputRegister(0);
826 90 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
827 0 : __ RetpolineJump(reg);
828 : } else {
829 90 : __ jmp(reg);
830 : }
831 : }
832 : unwinding_info_writer_.MarkBlockWillExit();
833 : frame_access_state()->ClearSPDelta();
834 219 : frame_access_state()->SetFrameAccessToDefault();
835 219 : break;
836 : }
837 : case kArchTailCallAddress: {
838 82768 : CHECK(!HasImmediateInput(instr, 0));
839 82768 : Register reg = i.InputRegister(0);
840 : DCHECK_IMPLIES(
841 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
842 : reg == kJavaScriptCallCodeStartRegister);
843 82768 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
844 0 : __ RetpolineJump(reg);
845 : } else {
846 82768 : __ jmp(reg);
847 : }
848 : unwinding_info_writer_.MarkBlockWillExit();
849 : frame_access_state()->ClearSPDelta();
850 82768 : frame_access_state()->SetFrameAccessToDefault();
851 : break;
852 : }
853 : case kArchCallJSFunction: {
854 : Register func = i.InputRegister(0);
855 23863 : if (FLAG_debug_code) {
856 : // Check the function's context matches the context argument.
857 8 : __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
858 8 : __ Assert(equal, AbortReason::kWrongFunctionContext);
859 : }
860 : static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
861 : __ LoadTaggedPointerField(rcx,
862 23863 : FieldOperand(func, JSFunction::kCodeOffset));
863 23863 : __ CallCodeObject(rcx);
864 : frame_access_state()->ClearSPDelta();
865 23863 : RecordCallPosition(instr);
866 : break;
867 : }
868 : case kArchPrepareCallCFunction: {
869 : // Frame alignment requires using FP-relative frame addressing.
870 : frame_access_state()->SetFrameAccessToFP();
871 25882 : int const num_parameters = MiscField::decode(instr->opcode());
872 25882 : __ PrepareCallCFunction(num_parameters);
873 25882 : break;
874 : }
875 : case kArchSaveCallerRegisters: {
876 : fp_mode_ =
877 900 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
878 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
879 : // kReturnRegister0 should have been saved before entering the stub.
880 900 : int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
881 : DCHECK(IsAligned(bytes, kSystemPointerSize));
882 : DCHECK_EQ(0, frame_access_state()->sp_delta());
883 900 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
884 : DCHECK(!caller_registers_saved_);
885 900 : caller_registers_saved_ = true;
886 900 : break;
887 : }
888 : case kArchRestoreCallerRegisters: {
889 : DCHECK(fp_mode_ ==
890 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
891 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
892 : // Don't overwrite the returned value.
893 1800 : int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
894 900 : frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
895 : DCHECK_EQ(0, frame_access_state()->sp_delta());
896 : DCHECK(caller_registers_saved_);
897 900 : caller_registers_saved_ = false;
898 900 : break;
899 : }
900 : case kArchPrepareTailCall:
901 119850 : AssemblePrepareTailCall();
902 119850 : break;
903 : case kArchCallCFunction: {
904 : int const num_parameters = MiscField::decode(instr->opcode());
905 25882 : if (HasImmediateInput(instr, 0)) {
906 24742 : ExternalReference ref = i.InputExternalReference(0);
907 24742 : __ CallCFunction(ref, num_parameters);
908 : } else {
909 1140 : Register func = i.InputRegister(0);
910 1140 : __ CallCFunction(func, num_parameters);
911 : }
912 25882 : frame_access_state()->SetFrameAccessToDefault();
913 : // Ideally, we should decrement SP delta to match the change of stack
914 : // pointer in CallCFunction. However, for certain architectures (e.g.
915 : // ARM), there may be more strict alignment requirement, causing old SP
916 : // to be saved on the stack. In those cases, we can not calculate the SP
917 : // delta statically.
918 : frame_access_state()->ClearSPDelta();
919 25882 : if (caller_registers_saved_) {
920 : // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
921 : // Here, we assume the sequence to be:
922 : // kArchSaveCallerRegisters;
923 : // kArchCallCFunction;
924 : // kArchRestoreCallerRegisters;
925 : int bytes =
926 1800 : __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
927 900 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
928 : }
929 : // TODO(tebbi): Do we need an lfence here?
930 : break;
931 : }
932 : case kArchJmp:
933 4959087 : AssembleArchJump(i.InputRpo(0));
934 4959183 : break;
935 : case kArchBinarySearchSwitch:
936 34090 : AssembleArchBinarySearchSwitch(instr);
937 34091 : break;
938 : case kArchLookupSwitch:
939 0 : AssembleArchLookupSwitch(instr);
940 0 : break;
941 : case kArchTableSwitch:
942 315 : AssembleArchTableSwitch(instr);
943 315 : break;
944 : case kArchComment:
945 4 : __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
946 4 : break;
947 : case kArchDebugAbort:
948 : DCHECK(i.InputRegister(0) == rdx);
949 160 : if (!frame_access_state()->has_frame()) {
950 : // We don't actually want to generate a pile of code for this, so just
951 : // claim there is a stack frame, without generating one.
952 16 : FrameScope scope(tasm(), StackFrame::NONE);
953 16 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
954 16 : RelocInfo::CODE_TARGET);
955 : } else {
956 144 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
957 144 : RelocInfo::CODE_TARGET);
958 : }
959 160 : __ int3();
960 : unwinding_info_writer_.MarkBlockWillExit();
961 : break;
962 : case kArchDebugBreak:
963 256023 : __ int3();
964 256023 : break;
965 : case kArchThrowTerminator:
966 : unwinding_info_writer_.MarkBlockWillExit();
967 : break;
968 : case kArchNop:
969 : // don't emit code for nops.
970 : break;
971 : case kArchDeoptimize: {
972 : int deopt_state_id =
973 44536 : BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
974 : CodeGenResult result =
975 44536 : AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
976 44536 : if (result != kSuccess) return result;
977 : unwinding_info_writer_.MarkBlockWillExit();
978 : break;
979 : }
980 : case kArchRet:
981 2988809 : AssembleReturn(instr->InputAt(0));
982 2988464 : break;
983 : case kArchStackPointer:
984 0 : __ movq(i.OutputRegister(), rsp);
985 : break;
986 : case kArchFramePointer:
987 32822 : __ movq(i.OutputRegister(), rbp);
988 : break;
989 : case kArchParentFramePointer:
990 58132 : if (frame_access_state()->has_frame()) {
991 80148 : __ movq(i.OutputRegister(), Operand(rbp, 0));
992 : } else {
993 31416 : __ movq(i.OutputRegister(), rbp);
994 : }
995 : break;
996 : case kArchTruncateDoubleToI: {
997 : auto result = i.OutputRegister();
998 : auto input = i.InputDoubleRegister(0);
999 : auto ool = new (zone()) OutOfLineTruncateDoubleToI(
1000 : this, result, input, DetermineStubCallMode(),
1001 106625 : &unwinding_info_writer_);
1002 : // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1003 : // use of Cvttsd2siq requires the movl below to avoid sign extension.
1004 53310 : __ Cvttsd2siq(result, input);
1005 53314 : __ cmpq(result, Immediate(1));
1006 53313 : __ j(overflow, ool->entry());
1007 53314 : __ bind(ool->exit());
1008 : __ movl(result, result);
1009 : break;
1010 : }
1011 : case kArchStoreWithWriteBarrier: {
1012 : RecordWriteMode mode =
1013 : static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1014 : Register object = i.InputRegister(0);
1015 319798 : size_t index = 0;
1016 319798 : Operand operand = i.MemoryOperand(&index);
1017 319798 : Register value = i.InputRegister(index);
1018 : Register scratch0 = i.TempRegister(0);
1019 : Register scratch1 = i.TempRegister(1);
1020 : auto ool = new (zone())
1021 : OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1022 639597 : mode, DetermineStubCallMode());
1023 319799 : __ StoreTaggedField(operand, value);
1024 : if (COMPRESS_POINTERS_BOOL) {
1025 : __ DecompressTaggedPointer(object, object);
1026 : }
1027 : __ CheckPageFlag(object, scratch0,
1028 : MemoryChunk::kPointersFromHereAreInterestingMask,
1029 319799 : not_zero, ool->entry());
1030 319799 : __ bind(ool->exit());
1031 : break;
1032 : }
1033 : case kArchWordPoisonOnSpeculation:
1034 : DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1035 0 : __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1036 : break;
1037 : case kLFence:
1038 0 : __ lfence();
1039 0 : break;
1040 : case kArchStackSlot: {
1041 : FrameOffset offset =
1042 367610 : frame_access_state()->GetFrameOffset(i.InputInt32(0));
1043 367611 : Register base = offset.from_stack_pointer() ? rsp : rbp;
1044 1102838 : __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1045 : break;
1046 : }
1047 : case kIeee754Float64Acos:
1048 116 : ASSEMBLE_IEEE754_UNOP(acos);
1049 116 : break;
1050 : case kIeee754Float64Acosh:
1051 116 : ASSEMBLE_IEEE754_UNOP(acosh);
1052 116 : break;
1053 : case kIeee754Float64Asin:
1054 116 : ASSEMBLE_IEEE754_UNOP(asin);
1055 116 : break;
1056 : case kIeee754Float64Asinh:
1057 116 : ASSEMBLE_IEEE754_UNOP(asinh);
1058 116 : break;
1059 : case kIeee754Float64Atan:
1060 133 : ASSEMBLE_IEEE754_UNOP(atan);
1061 133 : break;
1062 : case kIeee754Float64Atanh:
1063 116 : ASSEMBLE_IEEE754_UNOP(atanh);
1064 116 : break;
1065 : case kIeee754Float64Atan2:
1066 130 : ASSEMBLE_IEEE754_BINOP(atan2);
1067 130 : break;
1068 : case kIeee754Float64Cbrt:
1069 116 : ASSEMBLE_IEEE754_UNOP(cbrt);
1070 116 : break;
1071 : case kIeee754Float64Cos:
1072 271 : ASSEMBLE_IEEE754_UNOP(cos);
1073 271 : break;
1074 : case kIeee754Float64Cosh:
1075 123 : ASSEMBLE_IEEE754_UNOP(cosh);
1076 123 : break;
1077 : case kIeee754Float64Exp:
1078 148 : ASSEMBLE_IEEE754_UNOP(exp);
1079 148 : break;
1080 : case kIeee754Float64Expm1:
1081 123 : ASSEMBLE_IEEE754_UNOP(expm1);
1082 123 : break;
1083 : case kIeee754Float64Log:
1084 252 : ASSEMBLE_IEEE754_UNOP(log);
1085 252 : break;
1086 : case kIeee754Float64Log1p:
1087 116 : ASSEMBLE_IEEE754_UNOP(log1p);
1088 116 : break;
1089 : case kIeee754Float64Log2:
1090 116 : ASSEMBLE_IEEE754_UNOP(log2);
1091 116 : break;
1092 : case kIeee754Float64Log10:
1093 116 : ASSEMBLE_IEEE754_UNOP(log10);
1094 116 : break;
1095 : case kIeee754Float64Pow:
1096 330 : ASSEMBLE_IEEE754_BINOP(pow);
1097 330 : break;
1098 : case kIeee754Float64Sin:
1099 268 : ASSEMBLE_IEEE754_UNOP(sin);
1100 268 : break;
1101 : case kIeee754Float64Sinh:
1102 123 : ASSEMBLE_IEEE754_UNOP(sinh);
1103 123 : break;
1104 : case kIeee754Float64Tan:
1105 168 : ASSEMBLE_IEEE754_UNOP(tan);
1106 168 : break;
1107 : case kIeee754Float64Tanh:
1108 123 : ASSEMBLE_IEEE754_UNOP(tanh);
1109 123 : break;
1110 : case kX64Add32:
1111 292929 : ASSEMBLE_BINOP(addl);
1112 : break;
1113 : case kX64Add:
1114 306923 : ASSEMBLE_BINOP(addq);
1115 : break;
1116 : case kX64Sub32:
1117 182006 : ASSEMBLE_BINOP(subl);
1118 : break;
1119 : case kX64Sub:
1120 221018 : ASSEMBLE_BINOP(subq);
1121 : break;
1122 : case kX64And32:
1123 719880 : ASSEMBLE_BINOP(andl);
1124 : break;
1125 : case kX64And:
1126 926747 : ASSEMBLE_BINOP(andq);
1127 : break;
1128 : case kX64Cmp8:
1129 36197 : ASSEMBLE_COMPARE(cmpb);
1130 : break;
1131 : case kX64Cmp16:
1132 1300298 : ASSEMBLE_COMPARE(cmpw);
1133 : break;
1134 : case kX64Cmp32:
1135 4697284 : ASSEMBLE_COMPARE(cmpl);
1136 : break;
1137 : case kX64Cmp:
1138 8557002 : ASSEMBLE_COMPARE(cmpq);
1139 : break;
1140 : case kX64Test8:
1141 320413 : ASSEMBLE_COMPARE(testb);
1142 : break;
1143 : case kX64Test16:
1144 91504 : ASSEMBLE_COMPARE(testw);
1145 : break;
1146 : case kX64Test32:
1147 481587 : ASSEMBLE_COMPARE(testl);
1148 : break;
1149 : case kX64Test:
1150 2675786 : ASSEMBLE_COMPARE(testq);
1151 : break;
1152 : case kX64Imul32:
1153 156589 : ASSEMBLE_MULT(imull);
1154 : break;
1155 : case kX64Imul:
1156 61636 : ASSEMBLE_MULT(imulq);
1157 : break;
1158 : case kX64ImulHigh32:
1159 4196 : if (instr->InputAt(1)->IsRegister()) {
1160 4196 : __ imull(i.InputRegister(1));
1161 : } else {
1162 0 : __ imull(i.InputOperand(1));
1163 : }
1164 : break;
1165 : case kX64UmulHigh32:
1166 1428 : if (instr->InputAt(1)->IsRegister()) {
1167 1428 : __ mull(i.InputRegister(1));
1168 : } else {
1169 0 : __ mull(i.InputOperand(1));
1170 : }
1171 : break;
1172 : case kX64Idiv32:
1173 31993 : __ cdq();
1174 : __ idivl(i.InputRegister(1));
1175 : break;
1176 : case kX64Idiv:
1177 2732 : __ cqo();
1178 : __ idivq(i.InputRegister(1));
1179 : break;
1180 : case kX64Udiv32:
1181 29121 : __ xorl(rdx, rdx);
1182 : __ divl(i.InputRegister(1));
1183 : break;
1184 : case kX64Udiv:
1185 1768 : __ xorq(rdx, rdx);
1186 : __ divq(i.InputRegister(1));
1187 : break;
1188 : case kX64Not:
1189 88 : ASSEMBLE_UNOP(notq);
1190 : break;
1191 : case kX64Not32:
1192 5556 : ASSEMBLE_UNOP(notl);
1193 : break;
1194 : case kX64Neg:
1195 17454 : ASSEMBLE_UNOP(negq);
1196 : break;
1197 : case kX64Neg32:
1198 12222 : ASSEMBLE_UNOP(negl);
1199 : break;
1200 : case kX64Or32:
1201 315297 : ASSEMBLE_BINOP(orl);
1202 : break;
1203 : case kX64Or:
1204 343464 : ASSEMBLE_BINOP(orq);
1205 : break;
1206 : case kX64Xor32:
1207 77169 : ASSEMBLE_BINOP(xorl);
1208 : break;
1209 : case kX64Xor:
1210 3272 : ASSEMBLE_BINOP(xorq);
1211 : break;
1212 : case kX64Shl32:
1213 117985 : ASSEMBLE_SHIFT(shll, 5);
1214 : break;
1215 : case kX64Shl:
1216 1393730 : ASSEMBLE_SHIFT(shlq, 6);
1217 : break;
1218 : case kX64Shr32:
1219 392893 : ASSEMBLE_SHIFT(shrl, 5);
1220 : break;
1221 : case kX64Shr:
1222 1435726 : ASSEMBLE_SHIFT(shrq, 6);
1223 : break;
1224 : case kX64Sar32:
1225 98806 : ASSEMBLE_SHIFT(sarl, 5);
1226 : break;
1227 : case kX64Sar:
1228 714480 : ASSEMBLE_SHIFT(sarq, 6);
1229 : break;
1230 : case kX64Ror32:
1231 110780 : ASSEMBLE_SHIFT(rorl, 5);
1232 : break;
1233 : case kX64Ror:
1234 338 : ASSEMBLE_SHIFT(rorq, 6);
1235 : break;
1236 : case kX64Lzcnt:
1237 34 : if (instr->InputAt(0)->IsRegister()) {
1238 34 : __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1239 : } else {
1240 0 : __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1241 : }
1242 : break;
1243 : case kX64Lzcnt32:
1244 666 : if (instr->InputAt(0)->IsRegister()) {
1245 626 : __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1246 : } else {
1247 80 : __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1248 : }
1249 : break;
1250 : case kX64Tzcnt:
1251 34 : if (instr->InputAt(0)->IsRegister()) {
1252 34 : __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1253 : } else {
1254 0 : __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1255 : }
1256 : break;
1257 : case kX64Tzcnt32:
1258 332 : if (instr->InputAt(0)->IsRegister()) {
1259 332 : __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1260 : } else {
1261 0 : __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1262 : }
1263 : break;
1264 : case kX64Popcnt:
1265 42 : if (instr->InputAt(0)->IsRegister()) {
1266 42 : __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1267 : } else {
1268 0 : __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1269 : }
1270 : break;
1271 : case kX64Popcnt32:
1272 80 : if (instr->InputAt(0)->IsRegister()) {
1273 80 : __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1274 : } else {
1275 0 : __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1276 : }
1277 : break;
1278 : case kX64Bswap:
1279 12 : __ bswapq(i.OutputRegister());
1280 12 : break;
1281 : case kX64Bswap32:
1282 44 : __ bswapl(i.OutputRegister());
1283 44 : break;
1284 : case kSSEFloat32Cmp:
1285 0 : ASSEMBLE_SSE_BINOP(Ucomiss);
1286 : break;
1287 : case kSSEFloat32Add:
1288 0 : ASSEMBLE_SSE_BINOP(addss);
1289 : break;
1290 : case kSSEFloat32Sub:
1291 0 : ASSEMBLE_SSE_BINOP(subss);
1292 : break;
1293 : case kSSEFloat32Mul:
1294 0 : ASSEMBLE_SSE_BINOP(mulss);
1295 : break;
1296 : case kSSEFloat32Div:
1297 0 : ASSEMBLE_SSE_BINOP(divss);
1298 : // Don't delete this mov. It may improve performance on some CPUs,
1299 : // when there is a (v)mulss depending on the result.
1300 0 : __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1301 0 : break;
1302 : case kSSEFloat32Abs: {
1303 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1304 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1305 0 : __ psrlq(kScratchDoubleReg, 33);
1306 0 : __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1307 0 : break;
1308 : }
1309 : case kSSEFloat32Neg: {
1310 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1311 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1312 0 : __ psllq(kScratchDoubleReg, 31);
1313 0 : __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1314 0 : break;
1315 : }
1316 : case kSSEFloat32Sqrt:
1317 364 : ASSEMBLE_SSE_UNOP(sqrtss);
1318 : break;
1319 : case kSSEFloat32ToFloat64:
1320 40918 : ASSEMBLE_SSE_UNOP(Cvtss2sd);
1321 : break;
1322 : case kSSEFloat32Round: {
1323 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1324 : RoundingMode const mode =
1325 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1326 : __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1327 : break;
1328 : }
1329 : case kSSEFloat32ToInt32:
1330 348 : if (instr->InputAt(0)->IsFPRegister()) {
1331 348 : __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1332 : } else {
1333 0 : __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1334 : }
1335 : break;
1336 : case kSSEFloat32ToUint32: {
1337 56 : if (instr->InputAt(0)->IsFPRegister()) {
1338 56 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1339 : } else {
1340 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1341 : }
1342 : break;
1343 : }
1344 : case kSSEFloat64Cmp:
1345 1846 : ASSEMBLE_SSE_BINOP(Ucomisd);
1346 : break;
1347 : case kSSEFloat64Add:
1348 632 : ASSEMBLE_SSE_BINOP(addsd);
1349 : break;
1350 : case kSSEFloat64Sub:
1351 468 : ASSEMBLE_SSE_BINOP(subsd);
1352 : break;
1353 : case kSSEFloat64Mul:
1354 56 : ASSEMBLE_SSE_BINOP(mulsd);
1355 : break;
1356 : case kSSEFloat64Div:
1357 52 : ASSEMBLE_SSE_BINOP(divsd);
1358 : // Don't delete this mov. It may improve performance on some CPUs,
1359 : // when there is a (v)mulsd depending on the result.
1360 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1361 : break;
1362 : case kSSEFloat64Mod: {
1363 1577 : __ subq(rsp, Immediate(kDoubleSize));
1364 1577 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1365 1577 : kDoubleSize);
1366 : // Move values to st(0) and st(1).
1367 3154 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1368 1577 : __ fld_d(Operand(rsp, 0));
1369 3154 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1370 1577 : __ fld_d(Operand(rsp, 0));
1371 : // Loop while fprem isn't done.
1372 1577 : Label mod_loop;
1373 1577 : __ bind(&mod_loop);
1374 : // This instructions traps on all kinds inputs, but we are assuming the
1375 : // floating point control word is set to ignore them all.
1376 1577 : __ fprem();
1377 : // The following 2 instruction implicitly use rax.
1378 1577 : __ fnstsw_ax();
1379 1577 : if (CpuFeatures::IsSupported(SAHF)) {
1380 : CpuFeatureScope sahf_scope(tasm(), SAHF);
1381 1545 : __ sahf();
1382 : } else {
1383 : __ shrl(rax, Immediate(8));
1384 : __ andl(rax, Immediate(0xFF));
1385 32 : __ pushq(rax);
1386 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1387 32 : kSystemPointerSize);
1388 32 : __ popfq();
1389 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1390 32 : -kSystemPointerSize);
1391 : }
1392 1577 : __ j(parity_even, &mod_loop);
1393 : // Move output to stack and clean up.
1394 1577 : __ fstp(1);
1395 1577 : __ fstp_d(Operand(rsp, 0));
1396 3154 : __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1397 : __ addq(rsp, Immediate(kDoubleSize));
1398 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1399 1577 : -kDoubleSize);
1400 : break;
1401 : }
1402 : case kSSEFloat32Max: {
1403 66 : Label compare_swap, done_compare;
1404 66 : if (instr->InputAt(1)->IsFPRegister()) {
1405 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1406 : } else {
1407 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1408 : }
1409 : auto ool =
1410 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1411 66 : __ j(parity_even, ool->entry());
1412 66 : __ j(above, &done_compare, Label::kNear);
1413 66 : __ j(below, &compare_swap, Label::kNear);
1414 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1415 : __ testl(kScratchRegister, Immediate(1));
1416 66 : __ j(zero, &done_compare, Label::kNear);
1417 66 : __ bind(&compare_swap);
1418 66 : if (instr->InputAt(1)->IsFPRegister()) {
1419 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1420 : } else {
1421 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1422 : }
1423 66 : __ bind(&done_compare);
1424 66 : __ bind(ool->exit());
1425 : break;
1426 : }
1427 : case kSSEFloat32Min: {
1428 66 : Label compare_swap, done_compare;
1429 66 : if (instr->InputAt(1)->IsFPRegister()) {
1430 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1431 : } else {
1432 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1433 : }
1434 : auto ool =
1435 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1436 66 : __ j(parity_even, ool->entry());
1437 66 : __ j(below, &done_compare, Label::kNear);
1438 66 : __ j(above, &compare_swap, Label::kNear);
1439 66 : if (instr->InputAt(1)->IsFPRegister()) {
1440 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1441 : } else {
1442 0 : __ Movss(kScratchDoubleReg, i.InputOperand(1));
1443 : __ Movmskps(kScratchRegister, kScratchDoubleReg);
1444 : }
1445 : __ testl(kScratchRegister, Immediate(1));
1446 66 : __ j(zero, &done_compare, Label::kNear);
1447 66 : __ bind(&compare_swap);
1448 66 : if (instr->InputAt(1)->IsFPRegister()) {
1449 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1450 : } else {
1451 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1452 : }
1453 66 : __ bind(&done_compare);
1454 66 : __ bind(ool->exit());
1455 : break;
1456 : }
1457 : case kSSEFloat64Max: {
1458 252 : Label compare_swap, done_compare;
1459 252 : if (instr->InputAt(1)->IsFPRegister()) {
1460 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1461 : } else {
1462 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1463 : }
1464 : auto ool =
1465 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1466 252 : __ j(parity_even, ool->entry());
1467 252 : __ j(above, &done_compare, Label::kNear);
1468 252 : __ j(below, &compare_swap, Label::kNear);
1469 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1470 : __ testl(kScratchRegister, Immediate(1));
1471 252 : __ j(zero, &done_compare, Label::kNear);
1472 252 : __ bind(&compare_swap);
1473 252 : if (instr->InputAt(1)->IsFPRegister()) {
1474 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1475 : } else {
1476 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1477 : }
1478 252 : __ bind(&done_compare);
1479 252 : __ bind(ool->exit());
1480 : break;
1481 : }
1482 : case kSSEFloat64Min: {
1483 340 : Label compare_swap, done_compare;
1484 340 : if (instr->InputAt(1)->IsFPRegister()) {
1485 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1486 : } else {
1487 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1488 : }
1489 : auto ool =
1490 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1491 340 : __ j(parity_even, ool->entry());
1492 340 : __ j(below, &done_compare, Label::kNear);
1493 340 : __ j(above, &compare_swap, Label::kNear);
1494 340 : if (instr->InputAt(1)->IsFPRegister()) {
1495 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1496 : } else {
1497 0 : __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1498 : __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1499 : }
1500 : __ testl(kScratchRegister, Immediate(1));
1501 340 : __ j(zero, &done_compare, Label::kNear);
1502 340 : __ bind(&compare_swap);
1503 340 : if (instr->InputAt(1)->IsFPRegister()) {
1504 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1505 : } else {
1506 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1507 : }
1508 340 : __ bind(&done_compare);
1509 340 : __ bind(ool->exit());
1510 : break;
1511 : }
1512 : case kSSEFloat64Abs: {
1513 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1514 6 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1515 6 : __ psrlq(kScratchDoubleReg, 1);
1516 6 : __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1517 6 : break;
1518 : }
1519 : case kSSEFloat64Neg: {
1520 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1521 76 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1522 76 : __ psllq(kScratchDoubleReg, 63);
1523 76 : __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1524 76 : break;
1525 : }
1526 : case kSSEFloat64Sqrt:
1527 415 : ASSEMBLE_SSE_UNOP(Sqrtsd);
1528 : break;
1529 : case kSSEFloat64Round: {
1530 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1531 : RoundingMode const mode =
1532 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1533 : __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1534 : break;
1535 : }
1536 : case kSSEFloat64ToFloat32:
1537 36010 : ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1538 : break;
1539 : case kSSEFloat64ToInt32:
1540 128086 : if (instr->InputAt(0)->IsFPRegister()) {
1541 107818 : __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1542 : } else {
1543 40536 : __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1544 : }
1545 : break;
1546 : case kSSEFloat64ToUint32: {
1547 702 : if (instr->InputAt(0)->IsFPRegister()) {
1548 702 : __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1549 : } else {
1550 0 : __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1551 : }
1552 1404 : if (MiscField::decode(instr->opcode())) {
1553 642 : __ AssertZeroExtended(i.OutputRegister());
1554 : }
1555 : break;
1556 : }
1557 : case kSSEFloat32ToInt64:
1558 52 : if (instr->InputAt(0)->IsFPRegister()) {
1559 52 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1560 : } else {
1561 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1562 : }
1563 52 : if (instr->OutputCount() > 1) {
1564 48 : __ Set(i.OutputRegister(1), 1);
1565 48 : Label done;
1566 48 : Label fail;
1567 : __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1568 48 : if (instr->InputAt(0)->IsFPRegister()) {
1569 : __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1570 : } else {
1571 0 : __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1572 : }
1573 : // If the input is NaN, then the conversion fails.
1574 48 : __ j(parity_even, &fail);
1575 : // If the input is INT64_MIN, then the conversion succeeds.
1576 48 : __ j(equal, &done);
1577 : __ cmpq(i.OutputRegister(0), Immediate(1));
1578 : // If the conversion results in INT64_MIN, but the input was not
1579 : // INT64_MIN, then the conversion fails.
1580 48 : __ j(no_overflow, &done);
1581 48 : __ bind(&fail);
1582 48 : __ Set(i.OutputRegister(1), 0);
1583 48 : __ bind(&done);
1584 : }
1585 : break;
1586 : case kSSEFloat64ToInt64:
1587 1145 : if (instr->InputAt(0)->IsFPRegister()) {
1588 1144 : __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1589 : } else {
1590 2 : __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1591 : }
1592 1147 : if (instr->OutputCount() > 1) {
1593 1016 : __ Set(i.OutputRegister(1), 1);
1594 1021 : Label done;
1595 1021 : Label fail;
1596 : __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1597 1009 : if (instr->InputAt(0)->IsFPRegister()) {
1598 : __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1599 : } else {
1600 0 : __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1601 : }
1602 : // If the input is NaN, then the conversion fails.
1603 1022 : __ j(parity_even, &fail);
1604 : // If the input is INT64_MIN, then the conversion succeeds.
1605 1020 : __ j(equal, &done);
1606 : __ cmpq(i.OutputRegister(0), Immediate(1));
1607 : // If the conversion results in INT64_MIN, but the input was not
1608 : // INT64_MIN, then the conversion fails.
1609 1007 : __ j(no_overflow, &done);
1610 1010 : __ bind(&fail);
1611 1017 : __ Set(i.OutputRegister(1), 0);
1612 1013 : __ bind(&done);
1613 : }
1614 : break;
1615 : case kSSEFloat32ToUint64: {
1616 52 : Label fail;
1617 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1618 52 : if (instr->InputAt(0)->IsFPRegister()) {
1619 52 : __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1620 : } else {
1621 0 : __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1622 : }
1623 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1624 52 : __ bind(&fail);
1625 : break;
1626 : }
1627 : case kSSEFloat64ToUint64: {
1628 2972 : Label fail;
1629 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1630 2972 : if (instr->InputAt(0)->IsFPRegister()) {
1631 2972 : __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1632 : } else {
1633 0 : __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1634 : }
1635 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1636 2972 : __ bind(&fail);
1637 : break;
1638 : }
1639 : case kSSEInt32ToFloat64:
1640 378786 : if (instr->InputAt(0)->IsRegister()) {
1641 374901 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1642 : } else {
1643 7769 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1644 : }
1645 : break;
1646 : case kSSEInt32ToFloat32:
1647 984 : if (instr->InputAt(0)->IsRegister()) {
1648 976 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1649 : } else {
1650 16 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1651 : }
1652 : break;
1653 : case kSSEInt64ToFloat32:
1654 47 : if (instr->InputAt(0)->IsRegister()) {
1655 47 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1656 : } else {
1657 0 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1658 : }
1659 : break;
1660 : case kSSEInt64ToFloat64:
1661 3486 : if (instr->InputAt(0)->IsRegister()) {
1662 1486 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1663 : } else {
1664 4000 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1665 : }
1666 : break;
1667 : case kSSEUint64ToFloat32:
1668 32 : if (instr->InputAt(0)->IsRegister()) {
1669 32 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1670 : } else {
1671 0 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1672 : }
1673 : break;
1674 : case kSSEUint64ToFloat64:
1675 3608 : if (instr->InputAt(0)->IsRegister()) {
1676 2376 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1677 : } else {
1678 2464 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1679 : }
1680 : break;
1681 : case kSSEUint32ToFloat64:
1682 9970 : if (instr->InputAt(0)->IsRegister()) {
1683 392 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1684 : } else {
1685 19156 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1686 : }
1687 : break;
1688 : case kSSEUint32ToFloat32:
1689 88 : if (instr->InputAt(0)->IsRegister()) {
1690 88 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1691 : } else {
1692 0 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1693 : }
1694 : break;
1695 : case kSSEFloat64ExtractLowWord32:
1696 116 : if (instr->InputAt(0)->IsFPStackSlot()) {
1697 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
1698 : } else {
1699 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1700 : }
1701 : break;
1702 : case kSSEFloat64ExtractHighWord32:
1703 101570 : if (instr->InputAt(0)->IsFPStackSlot()) {
1704 120164 : __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1705 : } else {
1706 41488 : __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1707 : }
1708 : break;
1709 : case kSSEFloat64InsertLowWord32:
1710 4 : if (instr->InputAt(1)->IsRegister()) {
1711 4 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1712 : } else {
1713 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1714 : }
1715 : break;
1716 : case kSSEFloat64InsertHighWord32:
1717 116 : if (instr->InputAt(1)->IsRegister()) {
1718 116 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1719 : } else {
1720 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1721 : }
1722 : break;
1723 : case kSSEFloat64LoadLowWord32:
1724 112 : if (instr->InputAt(0)->IsRegister()) {
1725 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1726 : } else {
1727 0 : __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1728 : }
1729 : break;
1730 : case kAVXFloat32Cmp: {
1731 : CpuFeatureScope avx_scope(tasm(), AVX);
1732 1539 : if (instr->InputAt(1)->IsFPRegister()) {
1733 1516 : __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1734 : } else {
1735 46 : __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1736 : }
1737 : break;
1738 : }
1739 : case kAVXFloat32Add:
1740 3618 : ASSEMBLE_AVX_BINOP(vaddss);
1741 : break;
1742 : case kAVXFloat32Sub:
1743 5242 : ASSEMBLE_AVX_BINOP(vsubss);
1744 : break;
1745 : case kAVXFloat32Mul:
1746 1778 : ASSEMBLE_AVX_BINOP(vmulss);
1747 : break;
1748 : case kAVXFloat32Div:
1749 770 : ASSEMBLE_AVX_BINOP(vdivss);
1750 : // Don't delete this mov. It may improve performance on some CPUs,
1751 : // when there is a (v)mulss depending on the result.
1752 : __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1753 : break;
1754 : case kAVXFloat64Cmp: {
1755 : CpuFeatureScope avx_scope(tasm(), AVX);
1756 261924 : if (instr->InputAt(1)->IsFPRegister()) {
1757 241295 : __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1758 : } else {
1759 41258 : __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1760 : }
1761 : break;
1762 : }
1763 : case kAVXFloat64Add:
1764 158970 : ASSEMBLE_AVX_BINOP(vaddsd);
1765 : break;
1766 : case kAVXFloat64Sub:
1767 31170 : ASSEMBLE_AVX_BINOP(vsubsd);
1768 : break;
1769 : case kAVXFloat64Mul:
1770 23718 : ASSEMBLE_AVX_BINOP(vmulsd);
1771 : break;
1772 : case kAVXFloat64Div:
1773 24074 : ASSEMBLE_AVX_BINOP(vdivsd);
1774 : // Don't delete this mov. It may improve performance on some CPUs,
1775 : // when there is a (v)mulsd depending on the result.
1776 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1777 : break;
1778 : case kAVXFloat32Abs: {
1779 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1780 : CpuFeatureScope avx_scope(tasm(), AVX);
1781 66 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1782 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1783 66 : if (instr->InputAt(0)->IsFPRegister()) {
1784 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1785 : i.InputDoubleRegister(0));
1786 : } else {
1787 0 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1788 : i.InputOperand(0));
1789 : }
1790 : break;
1791 : }
1792 : case kAVXFloat32Neg: {
1793 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1794 : CpuFeatureScope avx_scope(tasm(), AVX);
1795 175 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1796 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1797 175 : if (instr->InputAt(0)->IsFPRegister()) {
1798 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1799 : i.InputDoubleRegister(0));
1800 : } else {
1801 0 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1802 : i.InputOperand(0));
1803 : }
1804 : break;
1805 : }
1806 : case kAVXFloat64Abs: {
1807 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1808 : CpuFeatureScope avx_scope(tasm(), AVX);
1809 623 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1810 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1811 623 : if (instr->InputAt(0)->IsFPRegister()) {
1812 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1813 : i.InputDoubleRegister(0));
1814 : } else {
1815 0 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1816 : i.InputOperand(0));
1817 : }
1818 : break;
1819 : }
1820 : case kAVXFloat64Neg: {
1821 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1822 : CpuFeatureScope avx_scope(tasm(), AVX);
1823 9651 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1824 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1825 9651 : if (instr->InputAt(0)->IsFPRegister()) {
1826 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1827 : i.InputDoubleRegister(0));
1828 : } else {
1829 82 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1830 : i.InputOperand(0));
1831 : }
1832 : break;
1833 : }
1834 : case kSSEFloat64SilenceNaN:
1835 : __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1836 : __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1837 : break;
1838 : case kX64Movsxbl:
1839 45811 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1840 92241 : ASSEMBLE_MOVX(movsxbl);
1841 45811 : __ AssertZeroExtended(i.OutputRegister());
1842 45811 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1843 45811 : break;
1844 : case kX64Movzxbl:
1845 175409 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1846 351958 : ASSEMBLE_MOVX(movzxbl);
1847 175411 : __ AssertZeroExtended(i.OutputRegister());
1848 175411 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1849 175411 : break;
1850 : case kX64Movsxbq:
1851 13514 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1852 27039 : ASSEMBLE_MOVX(movsxbq);
1853 13514 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1854 13514 : break;
1855 : case kX64Movzxbq:
1856 14020 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1857 28040 : ASSEMBLE_MOVX(movzxbq);
1858 14020 : __ AssertZeroExtended(i.OutputRegister());
1859 14020 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1860 14020 : break;
1861 : case kX64Movb: {
1862 78645 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1863 78644 : size_t index = 0;
1864 78644 : Operand operand = i.MemoryOperand(&index);
1865 157292 : if (HasImmediateInput(instr, index)) {
1866 13482 : __ movb(operand, Immediate(i.InputInt8(index)));
1867 : } else {
1868 71905 : __ movb(operand, i.InputRegister(index));
1869 : }
1870 78645 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1871 : break;
1872 : }
1873 : case kX64Movsxwl:
1874 10506 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1875 21494 : ASSEMBLE_MOVX(movsxwl);
1876 10507 : __ AssertZeroExtended(i.OutputRegister());
1877 10506 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1878 10506 : break;
1879 : case kX64Movzxwl:
1880 160717 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1881 327897 : ASSEMBLE_MOVX(movzxwl);
1882 160719 : __ AssertZeroExtended(i.OutputRegister());
1883 160719 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1884 160719 : break;
1885 : case kX64Movsxwq:
1886 9027 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1887 18058 : ASSEMBLE_MOVX(movsxwq);
1888 : break;
1889 : case kX64Movzxwq:
1890 672 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1891 1344 : ASSEMBLE_MOVX(movzxwq);
1892 672 : __ AssertZeroExtended(i.OutputRegister());
1893 672 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1894 672 : break;
1895 : case kX64Movw: {
1896 14014 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1897 14014 : size_t index = 0;
1898 14014 : Operand operand = i.MemoryOperand(&index);
1899 28030 : if (HasImmediateInput(instr, index)) {
1900 1830 : __ movw(operand, Immediate(i.InputInt16(index)));
1901 : } else {
1902 13100 : __ movw(operand, i.InputRegister(index));
1903 : }
1904 14015 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1905 : break;
1906 : }
1907 : case kX64Movl:
1908 2485652 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1909 2485360 : if (instr->HasOutput()) {
1910 961008 : if (instr->addressing_mode() == kMode_None) {
1911 52799 : if (instr->InputAt(0)->IsRegister()) {
1912 49218 : __ movl(i.OutputRegister(), i.InputRegister(0));
1913 : } else {
1914 7163 : __ movl(i.OutputRegister(), i.InputOperand(0));
1915 : }
1916 : } else {
1917 908228 : __ movl(i.OutputRegister(), i.MemoryOperand());
1918 : }
1919 961062 : __ AssertZeroExtended(i.OutputRegister());
1920 : } else {
1921 1524352 : size_t index = 0;
1922 1524352 : Operand operand = i.MemoryOperand(&index);
1923 3048898 : if (HasImmediateInput(instr, index)) {
1924 479445 : __ movl(operand, i.InputImmediate(index));
1925 : } else {
1926 1045007 : __ movl(operand, i.InputRegister(index));
1927 : }
1928 : }
1929 2485490 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1930 2485529 : break;
1931 : case kX64Movsxlq:
1932 231102 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1933 547416 : ASSEMBLE_MOVX(movsxlq);
1934 231102 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1935 231102 : break;
1936 : case kX64MovqDecompressTaggedSigned: {
1937 0 : CHECK(instr->HasOutput());
1938 0 : __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1939 0 : break;
1940 : }
1941 : case kX64MovqDecompressTaggedPointer: {
1942 0 : CHECK(instr->HasOutput());
1943 0 : __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1944 0 : break;
1945 : }
1946 : case kX64MovqDecompressAnyTagged: {
1947 0 : CHECK(instr->HasOutput());
1948 0 : __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1949 0 : break;
1950 : }
1951 : case kX64MovqCompressTagged: {
1952 0 : CHECK(!instr->HasOutput());
1953 0 : size_t index = 0;
1954 0 : Operand operand = i.MemoryOperand(&index);
1955 0 : if (HasImmediateInput(instr, index)) {
1956 0 : __ StoreTaggedField(operand, i.InputImmediate(index));
1957 : } else {
1958 0 : __ StoreTaggedField(operand, i.InputRegister(index));
1959 : }
1960 : break;
1961 : }
1962 : case kX64DecompressSigned: {
1963 0 : CHECK(instr->HasOutput());
1964 0 : ASSEMBLE_MOVX(movsxlq);
1965 : break;
1966 : }
1967 : case kX64DecompressPointer: {
1968 0 : CHECK(instr->HasOutput());
1969 0 : ASSEMBLE_MOVX(movsxlq);
1970 0 : __ addq(i.OutputRegister(), kRootRegister);
1971 : break;
1972 : }
1973 : case kX64DecompressAny: {
1974 0 : CHECK(instr->HasOutput());
1975 0 : ASSEMBLE_MOVX(movsxlq);
1976 : // TODO(solanes): Do branchful compute?
1977 : // Branchlessly compute |masked_root|:
1978 : STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
1979 : Register masked_root = kScratchRegister;
1980 0 : __ movl(masked_root, i.OutputRegister());
1981 : __ andl(masked_root, Immediate(kSmiTagMask));
1982 : __ negq(masked_root);
1983 : __ andq(masked_root, kRootRegister);
1984 : // Now this add operation will either leave the value unchanged if it is a
1985 : // smi or add the isolate root if it is a heap object.
1986 : __ addq(i.OutputRegister(), masked_root);
1987 : break;
1988 : }
1989 : // TODO(solanes): Combine into one Compress? They seem to be identical.
1990 : // TODO(solanes): We might get away with doing a no-op in these three cases.
1991 : // The movl instruction is the conservative way for the moment.
1992 : case kX64CompressSigned: {
1993 0 : ASSEMBLE_MOVX(movl);
1994 : break;
1995 : }
1996 : case kX64CompressPointer: {
1997 0 : ASSEMBLE_MOVX(movl);
1998 : break;
1999 : }
2000 : case kX64CompressAny: {
2001 0 : ASSEMBLE_MOVX(movl);
2002 : break;
2003 : }
2004 : case kX64Movq:
2005 8624346 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2006 8624053 : if (instr->HasOutput()) {
2007 5785970 : __ movq(i.OutputRegister(), i.MemoryOperand());
2008 : } else {
2009 2839578 : size_t index = 0;
2010 2839578 : Operand operand = i.MemoryOperand(&index);
2011 5679166 : if (HasImmediateInput(instr, index)) {
2012 21854 : __ movq(operand, i.InputImmediate(index));
2013 : } else {
2014 2817729 : __ movq(operand, i.InputRegister(index));
2015 : }
2016 : }
2017 8626541 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2018 8626053 : break;
2019 : case kX64Movss:
2020 28865 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2021 28868 : if (instr->HasOutput()) {
2022 15711 : __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
2023 : } else {
2024 13158 : size_t index = 0;
2025 13158 : Operand operand = i.MemoryOperand(&index);
2026 26316 : __ movss(operand, i.InputDoubleRegister(index));
2027 : }
2028 : break;
2029 : case kX64Movsd: {
2030 619130 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2031 619151 : if (instr->HasOutput()) {
2032 : const MemoryAccessMode access_mode =
2033 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2034 426427 : if (access_mode == kMemoryAccessPoisoned) {
2035 : // If we have to poison the loaded value, we load into a general
2036 : // purpose register first, mask it with the poison, and move the
2037 : // value from the general purpose register into the double register.
2038 0 : __ movq(kScratchRegister, i.MemoryOperand());
2039 : __ andq(kScratchRegister, kSpeculationPoisonRegister);
2040 : __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2041 : } else {
2042 : __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2043 : }
2044 : } else {
2045 192724 : size_t index = 0;
2046 192724 : Operand operand = i.MemoryOperand(&index);
2047 192726 : __ Movsd(operand, i.InputDoubleRegister(index));
2048 : }
2049 : break;
2050 : }
2051 : case kX64Movdqu: {
2052 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2053 9500 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2054 9500 : if (instr->HasOutput()) {
2055 5420 : __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2056 : } else {
2057 4080 : size_t index = 0;
2058 4080 : Operand operand = i.MemoryOperand(&index);
2059 8160 : __ movdqu(operand, i.InputSimd128Register(index));
2060 : }
2061 : break;
2062 : }
2063 : case kX64BitcastFI:
2064 51108 : if (instr->InputAt(0)->IsFPStackSlot()) {
2065 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
2066 : } else {
2067 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2068 : }
2069 : break;
2070 : case kX64BitcastDL:
2071 50784 : if (instr->InputAt(0)->IsFPStackSlot()) {
2072 0 : __ movq(i.OutputRegister(), i.InputOperand(0));
2073 : } else {
2074 : __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2075 : }
2076 : break;
2077 : case kX64BitcastIF:
2078 326 : if (instr->InputAt(0)->IsRegister()) {
2079 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2080 : } else {
2081 0 : __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2082 : }
2083 : break;
2084 : case kX64BitcastLD:
2085 214 : if (instr->InputAt(0)->IsRegister()) {
2086 : __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2087 : } else {
2088 0 : __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2089 : }
2090 : break;
2091 : case kX64Lea32: {
2092 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2093 : // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2094 : // and addressing mode just happens to work out. The "addl"/"subl" forms
2095 : // in these cases are faster based on measurements.
2096 328545 : if (i.InputRegister(0) == i.OutputRegister()) {
2097 153013 : if (mode == kMode_MRI) {
2098 : int32_t constant_summand = i.InputInt32(1);
2099 : DCHECK_NE(0, constant_summand);
2100 76389 : if (constant_summand > 0) {
2101 55208 : __ addl(i.OutputRegister(), Immediate(constant_summand));
2102 : } else {
2103 21181 : __ subl(i.OutputRegister(),
2104 : Immediate(base::NegateWithWraparound(constant_summand)));
2105 : }
2106 76616 : } else if (mode == kMode_MR1) {
2107 16605 : if (i.InputRegister(1) == i.OutputRegister()) {
2108 4287 : __ shll(i.OutputRegister(), Immediate(1));
2109 : } else {
2110 12318 : __ addl(i.OutputRegister(), i.InputRegister(1));
2111 : }
2112 60011 : } else if (mode == kMode_M2) {
2113 0 : __ shll(i.OutputRegister(), Immediate(1));
2114 60011 : } else if (mode == kMode_M4) {
2115 2932 : __ shll(i.OutputRegister(), Immediate(2));
2116 57079 : } else if (mode == kMode_M8) {
2117 122 : __ shll(i.OutputRegister(), Immediate(3));
2118 : } else {
2119 56958 : __ leal(i.OutputRegister(), i.MemoryOperand());
2120 : }
2121 201042 : } else if (mode == kMode_MR1 &&
2122 : i.InputRegister(1) == i.OutputRegister()) {
2123 16925 : __ addl(i.OutputRegister(), i.InputRegister(0));
2124 : } else {
2125 158603 : __ leal(i.OutputRegister(), i.MemoryOperand());
2126 : }
2127 328574 : __ AssertZeroExtended(i.OutputRegister());
2128 328597 : break;
2129 : }
2130 : case kX64Lea: {
2131 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2132 : // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2133 : // and addressing mode just happens to work out. The "addq"/"subq" forms
2134 : // in these cases are faster based on measurements.
2135 1936110 : if (i.InputRegister(0) == i.OutputRegister()) {
2136 536152 : if (mode == kMode_MRI) {
2137 : int32_t constant_summand = i.InputInt32(1);
2138 434887 : if (constant_summand > 0) {
2139 352433 : __ addq(i.OutputRegister(), Immediate(constant_summand));
2140 82454 : } else if (constant_summand < 0) {
2141 164850 : __ subq(i.OutputRegister(), Immediate(-constant_summand));
2142 : }
2143 101269 : } else if (mode == kMode_MR1) {
2144 50474 : if (i.InputRegister(1) == i.OutputRegister()) {
2145 2714 : __ shlq(i.OutputRegister(), Immediate(1));
2146 : } else {
2147 47760 : __ addq(i.OutputRegister(), i.InputRegister(1));
2148 : }
2149 50795 : } else if (mode == kMode_M2) {
2150 0 : __ shlq(i.OutputRegister(), Immediate(1));
2151 50795 : } else if (mode == kMode_M4) {
2152 336 : __ shlq(i.OutputRegister(), Immediate(2));
2153 50459 : } else if (mode == kMode_M8) {
2154 12792 : __ shlq(i.OutputRegister(), Immediate(3));
2155 : } else {
2156 37667 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2157 : }
2158 1640110 : } else if (mode == kMode_MR1 &&
2159 : i.InputRegister(1) == i.OutputRegister()) {
2160 168912 : __ addq(i.OutputRegister(), i.InputRegister(0));
2161 : } else {
2162 1231065 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2163 : }
2164 : break;
2165 : }
2166 : case kX64Dec32:
2167 0 : __ decl(i.OutputRegister());
2168 : break;
2169 : case kX64Inc32:
2170 0 : __ incl(i.OutputRegister());
2171 : break;
2172 : case kX64Push:
2173 3543038 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2174 18687 : size_t index = 0;
2175 18687 : Operand operand = i.MemoryOperand(&index);
2176 18690 : __ pushq(operand);
2177 : frame_access_state()->IncreaseSPDelta(1);
2178 18689 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2179 18689 : kSystemPointerSize);
2180 3524351 : } else if (HasImmediateInput(instr, 0)) {
2181 524111 : __ pushq(i.InputImmediate(0));
2182 : frame_access_state()->IncreaseSPDelta(1);
2183 524111 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2184 524111 : kSystemPointerSize);
2185 3000240 : } else if (instr->InputAt(0)->IsRegister()) {
2186 2249153 : __ pushq(i.InputRegister(0));
2187 : frame_access_state()->IncreaseSPDelta(1);
2188 2249160 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2189 2249160 : kSystemPointerSize);
2190 1490244 : } else if (instr->InputAt(0)->IsFloatRegister() ||
2191 : instr->InputAt(0)->IsDoubleRegister()) {
2192 : // TODO(titzer): use another machine instruction?
2193 22822 : __ subq(rsp, Immediate(kDoubleSize));
2194 : frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2195 22822 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2196 22822 : kDoubleSize);
2197 45644 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2198 728265 : } else if (instr->InputAt(0)->IsSimd128Register()) {
2199 : // TODO(titzer): use another machine instruction?
2200 96 : __ subq(rsp, Immediate(kSimd128Size));
2201 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2202 : kSystemPointerSize);
2203 96 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2204 96 : kSimd128Size);
2205 192 : __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2206 748026 : } else if (instr->InputAt(0)->IsStackSlot() ||
2207 741594 : instr->InputAt(0)->IsFloatStackSlot() ||
2208 : instr->InputAt(0)->IsDoubleStackSlot()) {
2209 727881 : __ pushq(i.InputOperand(0));
2210 : frame_access_state()->IncreaseSPDelta(1);
2211 727885 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2212 727885 : kSystemPointerSize);
2213 : } else {
2214 : DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2215 288 : __ Movups(kScratchDoubleReg, i.InputOperand(0));
2216 : // TODO(titzer): use another machine instruction?
2217 : __ subq(rsp, Immediate(kSimd128Size));
2218 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2219 : kSystemPointerSize);
2220 288 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2221 288 : kSimd128Size);
2222 576 : __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2223 : }
2224 : break;
2225 : case kX64Poke: {
2226 : int slot = MiscField::decode(instr->opcode());
2227 3392 : if (HasImmediateInput(instr, 0)) {
2228 2264 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2229 : } else {
2230 4520 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2231 : }
2232 : break;
2233 : }
2234 : case kX64Peek: {
2235 : int reverse_slot = i.InputInt32(0);
2236 : int offset =
2237 5067 : FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2238 5067 : if (instr->OutputAt(0)->IsFPRegister()) {
2239 : LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2240 2528 : if (op->representation() == MachineRepresentation::kFloat64) {
2241 2528 : __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2242 : } else {
2243 : DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2244 2528 : __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2245 : }
2246 : } else {
2247 7617 : __ movq(i.OutputRegister(), Operand(rbp, offset));
2248 : }
2249 : break;
2250 : }
2251 : // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2252 : case kX64F32x4Splat: {
2253 140 : XMMRegister dst = i.OutputSimd128Register();
2254 140 : if (instr->InputAt(0)->IsFPRegister()) {
2255 140 : __ movss(dst, i.InputDoubleRegister(0));
2256 : } else {
2257 0 : __ movss(dst, i.InputOperand(0));
2258 : }
2259 140 : __ shufps(dst, dst, 0x0);
2260 : break;
2261 : }
2262 : case kX64F32x4ExtractLane: {
2263 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2264 64 : __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2265 32 : __ movd(i.OutputDoubleRegister(), kScratchRegister);
2266 : break;
2267 : }
2268 : case kX64F32x4ReplaceLane: {
2269 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2270 : // The insertps instruction uses imm8[5:4] to indicate the lane
2271 : // that needs to be replaced.
2272 32 : byte select = i.InputInt8(1) << 4 & 0x30;
2273 32 : if (instr->InputAt(2)->IsFPRegister()) {
2274 64 : __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2275 32 : select);
2276 : } else {
2277 0 : __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2278 : }
2279 : break;
2280 : }
2281 : case kX64F32x4SConvertI32x4: {
2282 4 : __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2283 4 : break;
2284 : }
2285 : case kX64F32x4UConvertI32x4: {
2286 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2287 : DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2288 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2289 : XMMRegister dst = i.OutputSimd128Register();
2290 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2291 4 : __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2292 : __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2293 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2294 4 : __ psrld(dst, 1); // divide by 2 to get in unsigned range
2295 4 : __ cvtdq2ps(dst, dst); // convert hi exactly
2296 4 : __ addps(dst, dst); // double hi, exactly
2297 4 : __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2298 : break;
2299 : }
2300 : case kX64F32x4Abs: {
2301 : XMMRegister dst = i.OutputSimd128Register();
2302 : XMMRegister src = i.InputSimd128Register(0);
2303 4 : if (dst == src) {
2304 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2305 4 : __ psrld(kScratchDoubleReg, 1);
2306 4 : __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2307 : } else {
2308 0 : __ pcmpeqd(dst, dst);
2309 0 : __ psrld(dst, 1);
2310 0 : __ andps(dst, i.InputSimd128Register(0));
2311 : }
2312 : break;
2313 : }
2314 : case kX64F32x4Neg: {
2315 : XMMRegister dst = i.OutputSimd128Register();
2316 : XMMRegister src = i.InputSimd128Register(0);
2317 4 : if (dst == src) {
2318 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2319 4 : __ pslld(kScratchDoubleReg, 31);
2320 4 : __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2321 : } else {
2322 0 : __ pcmpeqd(dst, dst);
2323 0 : __ pslld(dst, 31);
2324 0 : __ xorps(dst, i.InputSimd128Register(0));
2325 : }
2326 : break;
2327 : }
2328 : case kX64F32x4RecipApprox: {
2329 4 : __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2330 4 : break;
2331 : }
2332 : case kX64F32x4RecipSqrtApprox: {
2333 4 : __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2334 4 : break;
2335 : }
2336 : case kX64F32x4Add: {
2337 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2338 12 : __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2339 12 : break;
2340 : }
2341 : case kX64F32x4AddHoriz: {
2342 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2343 : CpuFeatureScope sse_scope(tasm(), SSE3);
2344 4 : __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2345 : break;
2346 : }
2347 : case kX64F32x4Sub: {
2348 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2349 4 : __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2350 4 : break;
2351 : }
2352 : case kX64F32x4Mul: {
2353 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2354 4 : __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2355 4 : break;
2356 : }
2357 : case kX64F32x4Min: {
2358 4 : XMMRegister src1 = i.InputSimd128Register(1),
2359 4 : dst = i.OutputSimd128Register();
2360 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2361 : // The minps instruction doesn't propagate NaNs and +0's in its first
2362 : // operand. Perform minps in both orders, merge the resuls, and adjust.
2363 4 : __ movaps(kScratchDoubleReg, src1);
2364 4 : __ minps(kScratchDoubleReg, dst);
2365 4 : __ minps(dst, src1);
2366 : // propagate -0's and NaNs, which may be non-canonical.
2367 4 : __ orps(kScratchDoubleReg, dst);
2368 : // Canonicalize NaNs by quieting and clearing the payload.
2369 4 : __ cmpps(dst, kScratchDoubleReg, 3);
2370 4 : __ orps(kScratchDoubleReg, dst);
2371 4 : __ psrld(dst, 10);
2372 4 : __ andnps(dst, kScratchDoubleReg);
2373 : break;
2374 : }
2375 : case kX64F32x4Max: {
2376 4 : XMMRegister src1 = i.InputSimd128Register(1),
2377 4 : dst = i.OutputSimd128Register();
2378 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2379 : // The maxps instruction doesn't propagate NaNs and +0's in its first
2380 : // operand. Perform maxps in both orders, merge the resuls, and adjust.
2381 4 : __ movaps(kScratchDoubleReg, src1);
2382 4 : __ maxps(kScratchDoubleReg, dst);
2383 4 : __ maxps(dst, src1);
2384 : // Find discrepancies.
2385 4 : __ xorps(dst, kScratchDoubleReg);
2386 : // Propagate NaNs, which may be non-canonical.
2387 4 : __ orps(kScratchDoubleReg, dst);
2388 : // Propagate sign discrepancy and (subtle) quiet NaNs.
2389 4 : __ subps(kScratchDoubleReg, dst);
2390 : // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2391 4 : __ cmpps(dst, kScratchDoubleReg, 3);
2392 4 : __ psrld(dst, 10);
2393 4 : __ andnps(dst, kScratchDoubleReg);
2394 : break;
2395 : }
2396 : case kX64F32x4Eq: {
2397 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2398 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2399 4 : break;
2400 : }
2401 : case kX64F32x4Ne: {
2402 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2403 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2404 4 : break;
2405 : }
2406 : case kX64F32x4Lt: {
2407 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2408 8 : __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2409 : break;
2410 : }
2411 : case kX64F32x4Le: {
2412 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2413 8 : __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2414 : break;
2415 : }
2416 : case kX64I32x4Splat: {
2417 1088 : XMMRegister dst = i.OutputSimd128Register();
2418 1088 : if (instr->InputAt(0)->IsRegister()) {
2419 1088 : __ movd(dst, i.InputRegister(0));
2420 : } else {
2421 0 : __ movd(dst, i.InputOperand(0));
2422 : }
2423 1088 : __ pshufd(dst, dst, 0x0);
2424 : break;
2425 : }
2426 : case kX64I32x4ExtractLane: {
2427 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2428 3814 : __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2429 : break;
2430 : }
2431 : case kX64I32x4ReplaceLane: {
2432 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2433 1784 : if (instr->InputAt(2)->IsRegister()) {
2434 296 : __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2435 296 : i.InputInt8(1));
2436 : } else {
2437 2976 : __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2438 : }
2439 : break;
2440 : }
2441 : case kX64I32x4SConvertF32x4: {
2442 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2443 : XMMRegister dst = i.OutputSimd128Register();
2444 : // NAN->0
2445 4 : __ movaps(kScratchDoubleReg, dst);
2446 : __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2447 : __ pand(dst, kScratchDoubleReg);
2448 : // Set top bit if >= 0 (but not -0.0!)
2449 : __ pxor(kScratchDoubleReg, dst);
2450 : // Convert
2451 4 : __ cvttps2dq(dst, dst);
2452 : // Set top bit if >=0 is now < 0
2453 : __ pand(kScratchDoubleReg, dst);
2454 4 : __ psrad(kScratchDoubleReg, 31);
2455 : // Set positive overflow lanes to 0x7FFFFFFF
2456 : __ pxor(dst, kScratchDoubleReg);
2457 : break;
2458 : }
2459 : case kX64I32x4SConvertI16x8Low: {
2460 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2461 4 : __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2462 : break;
2463 : }
2464 : case kX64I32x4SConvertI16x8High: {
2465 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2466 : XMMRegister dst = i.OutputSimd128Register();
2467 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2468 : __ pmovsxwd(dst, dst);
2469 : break;
2470 : }
2471 : case kX64I32x4Neg: {
2472 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2473 : XMMRegister dst = i.OutputSimd128Register();
2474 : XMMRegister src = i.InputSimd128Register(0);
2475 4 : if (dst == src) {
2476 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2477 : __ psignd(dst, kScratchDoubleReg);
2478 : } else {
2479 0 : __ pxor(dst, dst);
2480 : __ psubd(dst, src);
2481 : }
2482 : break;
2483 : }
2484 : case kX64I32x4Shl: {
2485 248 : __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2486 124 : break;
2487 : }
2488 : case kX64I32x4ShrS: {
2489 248 : __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2490 124 : break;
2491 : }
2492 : case kX64I32x4Add: {
2493 12 : __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2494 : break;
2495 : }
2496 : case kX64I32x4AddHoriz: {
2497 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2498 4 : __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2499 : break;
2500 : }
2501 : case kX64I32x4Sub: {
2502 4 : __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2503 : break;
2504 : }
2505 : case kX64I32x4Mul: {
2506 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2507 4 : __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2508 : break;
2509 : }
2510 : case kX64I32x4MinS: {
2511 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2512 4 : __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2513 : break;
2514 : }
2515 : case kX64I32x4MaxS: {
2516 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2517 4 : __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2518 : break;
2519 : }
2520 : case kX64I32x4Eq: {
2521 12 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2522 : break;
2523 : }
2524 : case kX64I32x4Ne: {
2525 16 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2526 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2527 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2528 : break;
2529 : }
2530 : case kX64I32x4GtS: {
2531 8 : __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2532 : break;
2533 : }
2534 : case kX64I32x4GeS: {
2535 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2536 : XMMRegister dst = i.OutputSimd128Register();
2537 : XMMRegister src = i.InputSimd128Register(1);
2538 8 : __ pminsd(dst, src);
2539 : __ pcmpeqd(dst, src);
2540 : break;
2541 : }
2542 : case kX64I32x4UConvertF32x4: {
2543 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2544 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2545 : XMMRegister dst = i.OutputSimd128Register();
2546 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2547 : // NAN->0, negative->0
2548 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2549 4 : __ maxps(dst, kScratchDoubleReg);
2550 : // scratch: float representation of max_signed
2551 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2552 4 : __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2553 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2554 : // tmp: convert (src-max_signed).
2555 : // Positive overflow lanes -> 0x7FFFFFFF
2556 : // Negative lanes -> 0
2557 4 : __ movaps(tmp, dst);
2558 4 : __ subps(tmp, kScratchDoubleReg);
2559 : __ cmpleps(kScratchDoubleReg, tmp);
2560 4 : __ cvttps2dq(tmp, tmp);
2561 : __ pxor(tmp, kScratchDoubleReg);
2562 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2563 : __ pmaxsd(tmp, kScratchDoubleReg);
2564 : // convert. Overflow lanes above max_signed will be 0x80000000
2565 4 : __ cvttps2dq(dst, dst);
2566 : // Add (src-max_signed) for overflow lanes.
2567 : __ paddd(dst, tmp);
2568 : break;
2569 : }
2570 : case kX64I32x4UConvertI16x8Low: {
2571 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2572 4 : __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2573 : break;
2574 : }
2575 : case kX64I32x4UConvertI16x8High: {
2576 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2577 : XMMRegister dst = i.OutputSimd128Register();
2578 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2579 : __ pmovzxwd(dst, dst);
2580 : break;
2581 : }
2582 : case kX64I32x4ShrU: {
2583 248 : __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2584 124 : break;
2585 : }
2586 : case kX64I32x4MinU: {
2587 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2588 4 : __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2589 : break;
2590 : }
2591 : case kX64I32x4MaxU: {
2592 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2593 4 : __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2594 : break;
2595 : }
2596 : case kX64I32x4GtU: {
2597 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2598 : XMMRegister dst = i.OutputSimd128Register();
2599 : XMMRegister src = i.InputSimd128Register(1);
2600 8 : __ pmaxud(dst, src);
2601 : __ pcmpeqd(dst, src);
2602 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2603 : __ pxor(dst, kScratchDoubleReg);
2604 : break;
2605 : }
2606 : case kX64I32x4GeU: {
2607 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2608 : XMMRegister dst = i.OutputSimd128Register();
2609 : XMMRegister src = i.InputSimd128Register(1);
2610 8 : __ pminud(dst, src);
2611 : __ pcmpeqd(dst, src);
2612 : break;
2613 : }
2614 : case kX64S128Zero: {
2615 22 : XMMRegister dst = i.OutputSimd128Register();
2616 22 : __ xorps(dst, dst);
2617 : break;
2618 : }
2619 : case kX64I16x8Splat: {
2620 436 : XMMRegister dst = i.OutputSimd128Register();
2621 436 : if (instr->InputAt(0)->IsRegister()) {
2622 436 : __ movd(dst, i.InputRegister(0));
2623 : } else {
2624 0 : __ movd(dst, i.InputOperand(0));
2625 : }
2626 436 : __ pshuflw(dst, dst, 0x0);
2627 436 : __ pshufd(dst, dst, 0x0);
2628 : break;
2629 : }
2630 : case kX64I16x8ExtractLane: {
2631 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2632 48 : Register dst = i.OutputRegister();
2633 96 : __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2634 48 : __ movsxwl(dst, dst);
2635 : break;
2636 : }
2637 : case kX64I16x8ReplaceLane: {
2638 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2639 52 : if (instr->InputAt(2)->IsRegister()) {
2640 104 : __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2641 52 : i.InputInt8(1));
2642 : } else {
2643 0 : __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2644 : }
2645 : break;
2646 : }
2647 : case kX64I16x8SConvertI8x16Low: {
2648 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2649 4 : __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2650 : break;
2651 : }
2652 : case kX64I16x8SConvertI8x16High: {
2653 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2654 : XMMRegister dst = i.OutputSimd128Register();
2655 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2656 : __ pmovsxbw(dst, dst);
2657 : break;
2658 : }
2659 : case kX64I16x8Neg: {
2660 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2661 : XMMRegister dst = i.OutputSimd128Register();
2662 : XMMRegister src = i.InputSimd128Register(0);
2663 4 : if (dst == src) {
2664 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2665 : __ psignw(dst, kScratchDoubleReg);
2666 : } else {
2667 0 : __ pxor(dst, dst);
2668 : __ psubw(dst, src);
2669 : }
2670 : break;
2671 : }
2672 : case kX64I16x8Shl: {
2673 120 : __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2674 60 : break;
2675 : }
2676 : case kX64I16x8ShrS: {
2677 120 : __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2678 60 : break;
2679 : }
2680 : case kX64I16x8SConvertI32x4: {
2681 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2682 4 : __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2683 : break;
2684 : }
2685 : case kX64I16x8Add: {
2686 4 : __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2687 : break;
2688 : }
2689 : case kX64I16x8AddSaturateS: {
2690 4 : __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2691 : break;
2692 : }
2693 : case kX64I16x8AddHoriz: {
2694 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2695 4 : __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2696 : break;
2697 : }
2698 : case kX64I16x8Sub: {
2699 4 : __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2700 : break;
2701 : }
2702 : case kX64I16x8SubSaturateS: {
2703 4 : __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2704 : break;
2705 : }
2706 : case kX64I16x8Mul: {
2707 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2708 4 : __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2709 : break;
2710 : }
2711 : case kX64I16x8MinS: {
2712 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2713 4 : __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2714 : break;
2715 : }
2716 : case kX64I16x8MaxS: {
2717 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2718 4 : __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2719 : break;
2720 : }
2721 : case kX64I16x8Eq: {
2722 12 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2723 : break;
2724 : }
2725 : case kX64I16x8Ne: {
2726 16 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2727 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2728 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2729 : break;
2730 : }
2731 : case kX64I16x8GtS: {
2732 8 : __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2733 : break;
2734 : }
2735 : case kX64I16x8GeS: {
2736 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2737 : XMMRegister dst = i.OutputSimd128Register();
2738 : XMMRegister src = i.InputSimd128Register(1);
2739 8 : __ pminsw(dst, src);
2740 : __ pcmpeqw(dst, src);
2741 : break;
2742 : }
2743 : case kX64I16x8UConvertI8x16Low: {
2744 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2745 4 : __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2746 : break;
2747 : }
2748 : case kX64I16x8UConvertI8x16High: {
2749 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2750 : XMMRegister dst = i.OutputSimd128Register();
2751 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2752 : __ pmovzxbw(dst, dst);
2753 : break;
2754 : }
2755 : case kX64I16x8ShrU: {
2756 120 : __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2757 60 : break;
2758 : }
2759 : case kX64I16x8UConvertI32x4: {
2760 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2761 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2762 : XMMRegister dst = i.OutputSimd128Register();
2763 : // Change negative lanes to 0x7FFFFFFF
2764 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2765 4 : __ psrld(kScratchDoubleReg, 1);
2766 : __ pminud(dst, kScratchDoubleReg);
2767 : __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2768 : __ packusdw(dst, kScratchDoubleReg);
2769 : break;
2770 : }
2771 : case kX64I16x8AddSaturateU: {
2772 4 : __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2773 : break;
2774 : }
2775 : case kX64I16x8SubSaturateU: {
2776 4 : __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2777 : break;
2778 : }
2779 : case kX64I16x8MinU: {
2780 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2781 4 : __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2782 : break;
2783 : }
2784 : case kX64I16x8MaxU: {
2785 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2786 4 : __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2787 : break;
2788 : }
2789 : case kX64I16x8GtU: {
2790 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2791 : XMMRegister dst = i.OutputSimd128Register();
2792 : XMMRegister src = i.InputSimd128Register(1);
2793 16 : __ pmaxuw(dst, src);
2794 : __ pcmpeqw(dst, src);
2795 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2796 : __ pxor(dst, kScratchDoubleReg);
2797 : break;
2798 : }
2799 : case kX64I16x8GeU: {
2800 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2801 : XMMRegister dst = i.OutputSimd128Register();
2802 : XMMRegister src = i.InputSimd128Register(1);
2803 16 : __ pminuw(dst, src);
2804 : __ pcmpeqw(dst, src);
2805 : break;
2806 : }
2807 : case kX64I8x16Splat: {
2808 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2809 : XMMRegister dst = i.OutputSimd128Register();
2810 320 : if (instr->InputAt(0)->IsRegister()) {
2811 320 : __ movd(dst, i.InputRegister(0));
2812 : } else {
2813 0 : __ movd(dst, i.InputOperand(0));
2814 : }
2815 320 : __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2816 : __ pshufb(dst, kScratchDoubleReg);
2817 : break;
2818 : }
2819 : case kX64I8x16ExtractLane: {
2820 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2821 48 : Register dst = i.OutputRegister();
2822 96 : __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2823 48 : __ movsxbl(dst, dst);
2824 : break;
2825 : }
2826 : case kX64I8x16ReplaceLane: {
2827 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2828 84 : if (instr->InputAt(2)->IsRegister()) {
2829 168 : __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2830 84 : i.InputInt8(1));
2831 : } else {
2832 0 : __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2833 : }
2834 : break;
2835 : }
2836 : case kX64I8x16SConvertI16x8: {
2837 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2838 4 : __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2839 : break;
2840 : }
2841 : case kX64I8x16Neg: {
2842 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2843 : XMMRegister dst = i.OutputSimd128Register();
2844 : XMMRegister src = i.InputSimd128Register(0);
2845 4 : if (dst == src) {
2846 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2847 : __ psignb(dst, kScratchDoubleReg);
2848 : } else {
2849 0 : __ pxor(dst, dst);
2850 : __ psubb(dst, src);
2851 : }
2852 : break;
2853 : }
2854 : case kX64I8x16Shl: {
2855 : XMMRegister dst = i.OutputSimd128Register();
2856 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2857 28 : int8_t shift = i.InputInt8(1) & 0x7;
2858 28 : if (shift < 4) {
2859 : // For small shifts, doubling is faster.
2860 60 : for (int i = 0; i < shift; ++i) {
2861 24 : __ paddb(dst, dst);
2862 : }
2863 : } else {
2864 : // Mask off the unwanted bits before word-shifting.
2865 16 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2866 16 : __ psrlw(kScratchDoubleReg, 8 + shift);
2867 : __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2868 : __ pand(dst, kScratchDoubleReg);
2869 16 : __ psllw(dst, shift);
2870 : }
2871 : break;
2872 : }
2873 : case kX64I8x16ShrS: {
2874 : XMMRegister dst = i.OutputSimd128Register();
2875 : XMMRegister src = i.InputSimd128Register(0);
2876 28 : int8_t shift = i.InputInt8(1) & 0x7;
2877 : // Unpack the bytes into words, do arithmetic shifts, and repack.
2878 28 : __ punpckhbw(kScratchDoubleReg, src);
2879 : __ punpcklbw(dst, src);
2880 28 : __ psraw(kScratchDoubleReg, 8 + shift);
2881 28 : __ psraw(dst, 8 + shift);
2882 : __ packsswb(dst, kScratchDoubleReg);
2883 : break;
2884 : }
2885 : case kX64I8x16Add: {
2886 4 : __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2887 : break;
2888 : }
2889 : case kX64I8x16AddSaturateS: {
2890 4 : __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2891 : break;
2892 : }
2893 : case kX64I8x16Sub: {
2894 4 : __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2895 : break;
2896 : }
2897 : case kX64I8x16SubSaturateS: {
2898 4 : __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2899 : break;
2900 : }
2901 : case kX64I8x16Mul: {
2902 : XMMRegister dst = i.OutputSimd128Register();
2903 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2904 : XMMRegister right = i.InputSimd128Register(1);
2905 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2906 : // I16x8 view of I8x16
2907 : // left = AAaa AAaa ... AAaa AAaa
2908 : // right= BBbb BBbb ... BBbb BBbb
2909 : // t = 00AA 00AA ... 00AA 00AA
2910 : // s = 00BB 00BB ... 00BB 00BB
2911 4 : __ movaps(tmp, dst);
2912 4 : __ movaps(kScratchDoubleReg, right);
2913 4 : __ psrlw(tmp, 8);
2914 4 : __ psrlw(kScratchDoubleReg, 8);
2915 : // dst = left * 256
2916 4 : __ psllw(dst, 8);
2917 : // t = I16x8Mul(t, s)
2918 : // => __PP __PP ... __PP __PP
2919 : __ pmullw(tmp, kScratchDoubleReg);
2920 : // dst = I16x8Mul(left * 256, right)
2921 : // => pp__ pp__ ... pp__ pp__
2922 : __ pmullw(dst, right);
2923 : // t = I16x8Shl(t, 8)
2924 : // => PP00 PP00 ... PP00 PP00
2925 4 : __ psllw(tmp, 8);
2926 : // dst = I16x8Shr(dst, 8)
2927 : // => 00pp 00pp ... 00pp 00pp
2928 4 : __ psrlw(dst, 8);
2929 : // dst = I16x8Or(dst, t)
2930 : // => PPpp PPpp ... PPpp PPpp
2931 : __ por(dst, tmp);
2932 : break;
2933 : }
2934 : case kX64I8x16MinS: {
2935 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2936 4 : __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2937 : break;
2938 : }
2939 : case kX64I8x16MaxS: {
2940 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2941 4 : __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2942 : break;
2943 : }
2944 : case kX64I8x16Eq: {
2945 12 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2946 : break;
2947 : }
2948 : case kX64I8x16Ne: {
2949 16 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2950 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2951 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2952 : break;
2953 : }
2954 : case kX64I8x16GtS: {
2955 8 : __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2956 : break;
2957 : }
2958 : case kX64I8x16GeS: {
2959 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2960 : XMMRegister dst = i.OutputSimd128Register();
2961 : XMMRegister src = i.InputSimd128Register(1);
2962 8 : __ pminsb(dst, src);
2963 : __ pcmpeqb(dst, src);
2964 : break;
2965 : }
2966 : case kX64I8x16UConvertI16x8: {
2967 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2968 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2969 : XMMRegister dst = i.OutputSimd128Register();
2970 : // Change negative lanes to 0x7FFF
2971 4 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2972 4 : __ psrlw(kScratchDoubleReg, 1);
2973 : __ pminuw(dst, kScratchDoubleReg);
2974 : __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2975 : __ packuswb(dst, kScratchDoubleReg);
2976 : break;
2977 : }
2978 : case kX64I8x16ShrU: {
2979 : XMMRegister dst = i.OutputSimd128Register();
2980 : XMMRegister src = i.InputSimd128Register(0);
2981 28 : int8_t shift = i.InputInt8(1) & 0x7;
2982 : // Unpack the bytes into words, do logical shifts, and repack.
2983 28 : __ punpckhbw(kScratchDoubleReg, src);
2984 : __ punpcklbw(dst, src);
2985 28 : __ psrlw(kScratchDoubleReg, 8 + shift);
2986 28 : __ psrlw(dst, 8 + shift);
2987 : __ packuswb(dst, kScratchDoubleReg);
2988 : break;
2989 : }
2990 : case kX64I8x16AddSaturateU: {
2991 4 : __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2992 : break;
2993 : }
2994 : case kX64I8x16SubSaturateU: {
2995 4 : __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2996 : break;
2997 : }
2998 : case kX64I8x16MinU: {
2999 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3000 4 : __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3001 : break;
3002 : }
3003 : case kX64I8x16MaxU: {
3004 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3005 4 : __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3006 : break;
3007 : }
3008 : case kX64I8x16GtU: {
3009 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3010 : XMMRegister dst = i.OutputSimd128Register();
3011 : XMMRegister src = i.InputSimd128Register(1);
3012 16 : __ pmaxub(dst, src);
3013 : __ pcmpeqb(dst, src);
3014 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3015 : __ pxor(dst, kScratchDoubleReg);
3016 : break;
3017 : }
3018 : case kX64I8x16GeU: {
3019 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3020 : XMMRegister dst = i.OutputSimd128Register();
3021 : XMMRegister src = i.InputSimd128Register(1);
3022 16 : __ pminub(dst, src);
3023 : __ pcmpeqb(dst, src);
3024 : break;
3025 : }
3026 : case kX64S128And: {
3027 4 : __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3028 : break;
3029 : }
3030 : case kX64S128Or: {
3031 4 : __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3032 : break;
3033 : }
3034 : case kX64S128Xor: {
3035 4 : __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3036 : break;
3037 : }
3038 : case kX64S128Not: {
3039 : XMMRegister dst = i.OutputSimd128Register();
3040 : XMMRegister src = i.InputSimd128Register(0);
3041 4 : if (dst == src) {
3042 4 : __ movaps(kScratchDoubleReg, dst);
3043 : __ pcmpeqd(dst, dst);
3044 : __ pxor(dst, kScratchDoubleReg);
3045 : } else {
3046 0 : __ pcmpeqd(dst, dst);
3047 : __ pxor(dst, src);
3048 : }
3049 :
3050 : break;
3051 : }
3052 : case kX64S128Select: {
3053 : // Mask used here is stored in dst.
3054 28 : XMMRegister dst = i.OutputSimd128Register();
3055 28 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3056 28 : __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3057 28 : __ andps(dst, kScratchDoubleReg);
3058 28 : __ xorps(dst, i.InputSimd128Register(2));
3059 : break;
3060 : }
3061 : case kX64S8x16Shuffle: {
3062 : XMMRegister dst = i.OutputSimd128Register();
3063 : Register tmp = i.TempRegister(0);
3064 : // Prepare 16 byte aligned buffer for shuffle control mask
3065 1400 : __ movq(tmp, rsp);
3066 : __ andq(rsp, Immediate(-16));
3067 1400 : if (instr->InputCount() == 5) { // only one input operand
3068 704 : uint32_t mask[4] = {};
3069 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3070 6336 : for (int j = 4; j > 0; j--) {
3071 5632 : mask[j - 1] = i.InputUint32(j);
3072 : }
3073 :
3074 704 : SetupShuffleMaskOnStack(tasm(), mask);
3075 1408 : __ pshufb(dst, Operand(rsp, 0));
3076 : } else { // two input operands
3077 : DCHECK_EQ(6, instr->InputCount());
3078 1392 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
3079 696 : uint32_t mask[4] = {};
3080 6264 : for (int j = 5; j > 1; j--) {
3081 2784 : uint32_t lanes = i.InputUint32(j);
3082 25056 : for (int k = 0; k < 32; k += 8) {
3083 11136 : uint8_t lane = lanes >> k;
3084 11136 : mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3085 : }
3086 : }
3087 696 : SetupShuffleMaskOnStack(tasm(), mask);
3088 1392 : __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
3089 696 : uint32_t mask1[4] = {};
3090 696 : if (instr->InputAt(1)->IsSimd128Register()) {
3091 : XMMRegister src1 = i.InputSimd128Register(1);
3092 696 : if (src1 != dst) __ movups(dst, src1);
3093 : } else {
3094 0 : __ movups(dst, i.InputOperand(1));
3095 : }
3096 6264 : for (int j = 5; j > 1; j--) {
3097 2784 : uint32_t lanes = i.InputUint32(j);
3098 25056 : for (int k = 0; k < 32; k += 8) {
3099 11136 : uint8_t lane = lanes >> k;
3100 11136 : mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3101 : }
3102 : }
3103 696 : SetupShuffleMaskOnStack(tasm(), mask1);
3104 1392 : __ pshufb(dst, Operand(rsp, 0));
3105 : __ por(dst, kScratchDoubleReg);
3106 : }
3107 : __ movq(rsp, tmp);
3108 : break;
3109 : }
3110 : case kX64S32x4Swizzle: {
3111 : DCHECK_EQ(2, instr->InputCount());
3112 924 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3113 : i.InputInt8(1));
3114 : break;
3115 : }
3116 : case kX64S32x4Shuffle: {
3117 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3118 : DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3119 : int8_t shuffle = i.InputInt8(2);
3120 : DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3121 768 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3122 768 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3123 768 : __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3124 : break;
3125 : }
3126 : case kX64S16x8Blend: {
3127 112 : ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3128 56 : break;
3129 : }
3130 : case kX64S16x8HalfShuffle1: {
3131 240 : XMMRegister dst = i.OutputSimd128Register();
3132 720 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3133 240 : __ pshufhw(dst, dst, i.InputInt8(2));
3134 : break;
3135 : }
3136 : case kX64S16x8HalfShuffle2: {
3137 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3138 164 : XMMRegister dst = i.OutputSimd128Register();
3139 492 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3140 164 : __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3141 492 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3142 164 : __ pshufhw(dst, dst, i.InputInt8(3));
3143 164 : __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3144 : break;
3145 : }
3146 : case kX64S8x16Alignr: {
3147 480 : ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3148 240 : break;
3149 : }
3150 : case kX64S16x8Dup: {
3151 124 : XMMRegister dst = i.OutputSimd128Register();
3152 124 : int8_t lane = i.InputInt8(1) & 0x7;
3153 124 : int8_t lane4 = lane & 0x3;
3154 124 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3155 124 : if (lane < 4) {
3156 248 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3157 124 : __ pshufd(dst, dst, 0);
3158 : } else {
3159 0 : ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3160 0 : __ pshufd(dst, dst, 0xaa);
3161 : }
3162 : break;
3163 : }
3164 : case kX64S8x16Dup: {
3165 : XMMRegister dst = i.OutputSimd128Register();
3166 264 : int8_t lane = i.InputInt8(1) & 0xf;
3167 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3168 264 : if (lane < 8) {
3169 264 : __ punpcklbw(dst, dst);
3170 : } else {
3171 0 : __ punpckhbw(dst, dst);
3172 : }
3173 264 : lane &= 0x7;
3174 264 : int8_t lane4 = lane & 0x3;
3175 264 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3176 264 : if (lane < 4) {
3177 232 : __ pshuflw(dst, dst, half_dup);
3178 232 : __ pshufd(dst, dst, 0);
3179 : } else {
3180 32 : __ pshufhw(dst, dst, half_dup);
3181 32 : __ pshufd(dst, dst, 0xaa);
3182 : }
3183 : break;
3184 : }
3185 : case kX64S64x2UnpackHigh:
3186 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3187 : break;
3188 : case kX64S32x4UnpackHigh:
3189 276 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3190 : break;
3191 : case kX64S16x8UnpackHigh:
3192 288 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3193 : break;
3194 : case kX64S8x16UnpackHigh:
3195 216 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3196 : break;
3197 : case kX64S64x2UnpackLow:
3198 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3199 : break;
3200 : case kX64S32x4UnpackLow:
3201 264 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3202 : break;
3203 : case kX64S16x8UnpackLow:
3204 348 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3205 : break;
3206 : case kX64S8x16UnpackLow:
3207 300 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3208 : break;
3209 : case kX64S16x8UnzipHigh: {
3210 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3211 : XMMRegister dst = i.OutputSimd128Register();
3212 : XMMRegister src2 = dst;
3213 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3214 76 : if (instr->InputCount() == 2) {
3215 136 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3216 68 : __ psrld(kScratchDoubleReg, 16);
3217 : src2 = kScratchDoubleReg;
3218 : }
3219 76 : __ psrld(dst, 16);
3220 : __ packusdw(dst, src2);
3221 : break;
3222 : }
3223 : case kX64S16x8UnzipLow: {
3224 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3225 : XMMRegister dst = i.OutputSimd128Register();
3226 : XMMRegister src2 = dst;
3227 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3228 48 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3229 48 : if (instr->InputCount() == 2) {
3230 80 : ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3231 : src2 = kScratchDoubleReg;
3232 : }
3233 48 : __ pblendw(dst, kScratchDoubleReg, 0xaa);
3234 : __ packusdw(dst, src2);
3235 : break;
3236 : }
3237 : case kX64S8x16UnzipHigh: {
3238 : XMMRegister dst = i.OutputSimd128Register();
3239 : XMMRegister src2 = dst;
3240 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3241 112 : if (instr->InputCount() == 2) {
3242 208 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3243 104 : __ psrlw(kScratchDoubleReg, 8);
3244 : src2 = kScratchDoubleReg;
3245 : }
3246 112 : __ psrlw(dst, 8);
3247 : __ packuswb(dst, src2);
3248 : break;
3249 : }
3250 : case kX64S8x16UnzipLow: {
3251 : XMMRegister dst = i.OutputSimd128Register();
3252 : XMMRegister src2 = dst;
3253 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3254 72 : if (instr->InputCount() == 2) {
3255 128 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3256 64 : __ psllw(kScratchDoubleReg, 8);
3257 64 : __ psrlw(kScratchDoubleReg, 8);
3258 : src2 = kScratchDoubleReg;
3259 : }
3260 72 : __ psllw(dst, 8);
3261 72 : __ psrlw(dst, 8);
3262 : __ packuswb(dst, src2);
3263 : break;
3264 : }
3265 : case kX64S8x16TransposeLow: {
3266 : XMMRegister dst = i.OutputSimd128Register();
3267 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3268 80 : __ psllw(dst, 8);
3269 80 : if (instr->InputCount() == 1) {
3270 8 : __ movups(kScratchDoubleReg, dst);
3271 : } else {
3272 : DCHECK_EQ(2, instr->InputCount());
3273 144 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3274 72 : __ psllw(kScratchDoubleReg, 8);
3275 : }
3276 80 : __ psrlw(dst, 8);
3277 : __ por(dst, kScratchDoubleReg);
3278 : break;
3279 : }
3280 : case kX64S8x16TransposeHigh: {
3281 : XMMRegister dst = i.OutputSimd128Register();
3282 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3283 96 : __ psrlw(dst, 8);
3284 96 : if (instr->InputCount() == 1) {
3285 8 : __ movups(kScratchDoubleReg, dst);
3286 : } else {
3287 : DCHECK_EQ(2, instr->InputCount());
3288 176 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3289 88 : __ psrlw(kScratchDoubleReg, 8);
3290 : }
3291 96 : __ psllw(kScratchDoubleReg, 8);
3292 : __ por(dst, kScratchDoubleReg);
3293 : break;
3294 : }
3295 : case kX64S8x8Reverse:
3296 : case kX64S8x4Reverse:
3297 : case kX64S8x2Reverse: {
3298 : DCHECK_EQ(1, instr->InputCount());
3299 : XMMRegister dst = i.OutputSimd128Register();
3300 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3301 336 : if (arch_opcode != kX64S8x2Reverse) {
3302 : // First shuffle words into position.
3303 228 : int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3304 228 : __ pshuflw(dst, dst, shuffle_mask);
3305 228 : __ pshufhw(dst, dst, shuffle_mask);
3306 : }
3307 336 : __ movaps(kScratchDoubleReg, dst);
3308 336 : __ psrlw(kScratchDoubleReg, 8);
3309 336 : __ psllw(dst, 8);
3310 : __ por(dst, kScratchDoubleReg);
3311 : break;
3312 : }
3313 : case kX64S1x4AnyTrue:
3314 : case kX64S1x8AnyTrue:
3315 : case kX64S1x16AnyTrue: {
3316 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3317 : Register dst = i.OutputRegister();
3318 : XMMRegister src = i.InputSimd128Register(0);
3319 : Register tmp = i.TempRegister(0);
3320 60 : __ xorq(tmp, tmp);
3321 : __ movq(dst, Immediate(1));
3322 : __ ptest(src, src);
3323 60 : __ cmovq(zero, dst, tmp);
3324 : break;
3325 : }
3326 : case kX64S1x4AllTrue:
3327 : case kX64S1x8AllTrue:
3328 : case kX64S1x16AllTrue: {
3329 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3330 : Register dst = i.OutputRegister();
3331 : XMMRegister src = i.InputSimd128Register(0);
3332 : Register tmp = i.TempRegister(0);
3333 60 : __ movq(tmp, Immediate(1));
3334 : __ xorq(dst, dst);
3335 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3336 : __ pxor(kScratchDoubleReg, src);
3337 : __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3338 60 : __ cmovq(zero, dst, tmp);
3339 : break;
3340 : }
3341 : case kX64StackCheck:
3342 562627 : __ CompareRoot(rsp, RootIndex::kStackLimit);
3343 562635 : break;
3344 : case kWord32AtomicExchangeInt8: {
3345 817 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3346 817 : __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3347 817 : break;
3348 : }
3349 : case kWord32AtomicExchangeUint8: {
3350 743 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3351 : __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3352 : break;
3353 : }
3354 : case kWord32AtomicExchangeInt16: {
3355 875 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3356 877 : __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3357 877 : break;
3358 : }
3359 : case kWord32AtomicExchangeUint16: {
3360 708 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3361 : __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3362 : break;
3363 : }
3364 : case kWord32AtomicExchangeWord32: {
3365 1178 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3366 : break;
3367 : }
3368 : case kWord32AtomicCompareExchangeInt8: {
3369 112 : __ lock();
3370 112 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3371 112 : __ movsxbl(rax, rax);
3372 112 : break;
3373 : }
3374 : case kWord32AtomicCompareExchangeUint8: {
3375 132 : __ lock();
3376 132 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3377 : __ movzxbl(rax, rax);
3378 : break;
3379 : }
3380 : case kWord32AtomicCompareExchangeInt16: {
3381 112 : __ lock();
3382 112 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3383 112 : __ movsxwl(rax, rax);
3384 112 : break;
3385 : }
3386 : case kWord32AtomicCompareExchangeUint16: {
3387 133 : __ lock();
3388 133 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3389 : __ movzxwl(rax, rax);
3390 : break;
3391 : }
3392 : case kWord32AtomicCompareExchangeWord32: {
3393 263 : __ lock();
3394 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3395 : break;
3396 : }
3397 : #define ATOMIC_BINOP_CASE(op, inst) \
3398 : case kWord32Atomic##op##Int8: \
3399 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3400 : __ movsxbl(rax, rax); \
3401 : break; \
3402 : case kWord32Atomic##op##Uint8: \
3403 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3404 : __ movzxbl(rax, rax); \
3405 : break; \
3406 : case kWord32Atomic##op##Int16: \
3407 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3408 : __ movsxwl(rax, rax); \
3409 : break; \
3410 : case kWord32Atomic##op##Uint16: \
3411 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3412 : __ movzxwl(rax, rax); \
3413 : break; \
3414 : case kWord32Atomic##op##Word32: \
3415 : ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3416 : break;
3417 8556 : ATOMIC_BINOP_CASE(Add, addl)
3418 8531 : ATOMIC_BINOP_CASE(Sub, subl)
3419 7283 : ATOMIC_BINOP_CASE(And, andl)
3420 8879 : ATOMIC_BINOP_CASE(Or, orl)
3421 8748 : ATOMIC_BINOP_CASE(Xor, xorl)
3422 : #undef ATOMIC_BINOP_CASE
3423 : case kX64Word64AtomicExchangeUint8: {
3424 565 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3425 : __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3426 : break;
3427 : }
3428 : case kX64Word64AtomicExchangeUint16: {
3429 599 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3430 : __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3431 : break;
3432 : }
3433 : case kX64Word64AtomicExchangeUint32: {
3434 490 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3435 : break;
3436 : }
3437 : case kX64Word64AtomicExchangeUint64: {
3438 676 : __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3439 : break;
3440 : }
3441 : case kX64Word64AtomicCompareExchangeUint8: {
3442 18 : __ lock();
3443 18 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3444 : __ movzxbq(rax, rax);
3445 : break;
3446 : }
3447 : case kX64Word64AtomicCompareExchangeUint16: {
3448 27 : __ lock();
3449 27 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3450 : __ movzxwq(rax, rax);
3451 : break;
3452 : }
3453 : case kX64Word64AtomicCompareExchangeUint32: {
3454 29 : __ lock();
3455 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3456 : break;
3457 : }
3458 : case kX64Word64AtomicCompareExchangeUint64: {
3459 276 : __ lock();
3460 : __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3461 : break;
3462 : }
3463 : #define ATOMIC64_BINOP_CASE(op, inst) \
3464 : case kX64Word64Atomic##op##Uint8: \
3465 : ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3466 : __ movzxbq(rax, rax); \
3467 : break; \
3468 : case kX64Word64Atomic##op##Uint16: \
3469 : ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3470 : __ movzxwq(rax, rax); \
3471 : break; \
3472 : case kX64Word64Atomic##op##Uint32: \
3473 : ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3474 : break; \
3475 : case kX64Word64Atomic##op##Uint64: \
3476 : ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3477 : break;
3478 3892 : ATOMIC64_BINOP_CASE(Add, addq)
3479 2997 : ATOMIC64_BINOP_CASE(Sub, subq)
3480 4054 : ATOMIC64_BINOP_CASE(And, andq)
3481 4164 : ATOMIC64_BINOP_CASE(Or, orq)
3482 3070 : ATOMIC64_BINOP_CASE(Xor, xorq)
3483 : #undef ATOMIC64_BINOP_CASE
3484 : case kWord32AtomicLoadInt8:
3485 : case kWord32AtomicLoadUint8:
3486 : case kWord32AtomicLoadInt16:
3487 : case kWord32AtomicLoadUint16:
3488 : case kWord32AtomicLoadWord32:
3489 : case kWord32AtomicStoreWord8:
3490 : case kWord32AtomicStoreWord16:
3491 : case kWord32AtomicStoreWord32:
3492 : case kX64Word64AtomicLoadUint8:
3493 : case kX64Word64AtomicLoadUint16:
3494 : case kX64Word64AtomicLoadUint32:
3495 : case kX64Word64AtomicLoadUint64:
3496 : case kX64Word64AtomicStoreWord8:
3497 : case kX64Word64AtomicStoreWord16:
3498 : case kX64Word64AtomicStoreWord32:
3499 : case kX64Word64AtomicStoreWord64:
3500 0 : UNREACHABLE(); // Won't be generated by instruction selector.
3501 : break;
3502 : }
3503 : return kSuccess;
3504 : } // NOLadability/fn_size)
3505 :
3506 : #undef ASSEMBLE_UNOP
3507 : #undef ASSEMBLE_BINOP
3508 : #undef ASSEMBLE_COMPARE
3509 : #undef ASSEMBLE_MULT
3510 : #undef ASSEMBLE_SHIFT
3511 : #undef ASSEMBLE_MOVX
3512 : #undef ASSEMBLE_SSE_BINOP
3513 : #undef ASSEMBLE_SSE_UNOP
3514 : #undef ASSEMBLE_AVX_BINOP
3515 : #undef ASSEMBLE_IEEE754_BINOP
3516 : #undef ASSEMBLE_IEEE754_UNOP
3517 : #undef ASSEMBLE_ATOMIC_BINOP
3518 : #undef ASSEMBLE_ATOMIC64_BINOP
3519 : #undef ASSEMBLE_SIMD_INSTR
3520 : #undef ASSEMBLE_SIMD_IMM_INSTR
3521 : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3522 : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
3523 :
3524 : namespace {
3525 :
3526 6195604 : Condition FlagsConditionToCondition(FlagsCondition condition) {
3527 6195604 : switch (condition) {
3528 : case kUnorderedEqual:
3529 : case kEqual:
3530 : return equal;
3531 : case kUnorderedNotEqual:
3532 : case kNotEqual:
3533 1450383 : return not_equal;
3534 : case kSignedLessThan:
3535 176519 : return less;
3536 : case kSignedGreaterThanOrEqual:
3537 59288 : return greater_equal;
3538 : case kSignedLessThanOrEqual:
3539 66213 : return less_equal;
3540 : case kSignedGreaterThan:
3541 67172 : return greater;
3542 : case kUnsignedLessThan:
3543 164001 : return below;
3544 : case kUnsignedGreaterThanOrEqual:
3545 378916 : return above_equal;
3546 : case kUnsignedLessThanOrEqual:
3547 945163 : return below_equal;
3548 : case kUnsignedGreaterThan:
3549 120214 : return above;
3550 : case kOverflow:
3551 179939 : return overflow;
3552 : case kNotOverflow:
3553 1008 : return no_overflow;
3554 : default:
3555 : break;
3556 : }
3557 0 : UNREACHABLE();
3558 : }
3559 :
3560 : } // namespace
3561 :
3562 : // Assembles branches after this instruction.
3563 5349189 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3564 : Label::Distance flabel_distance =
3565 5349189 : branch->fallthru ? Label::kNear : Label::kFar;
3566 5349189 : Label* tlabel = branch->true_label;
3567 5349189 : Label* flabel = branch->false_label;
3568 5349189 : if (branch->condition == kUnorderedEqual) {
3569 57915 : __ j(parity_even, flabel, flabel_distance);
3570 5291274 : } else if (branch->condition == kUnorderedNotEqual) {
3571 104680 : __ j(parity_even, tlabel);
3572 : }
3573 5349189 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3574 :
3575 5349222 : if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3576 5349222 : }
3577 :
3578 0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3579 : Instruction* instr) {
3580 : // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3581 0 : if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3582 : return;
3583 : }
3584 :
3585 : condition = NegateFlagsCondition(condition);
3586 0 : __ movl(kScratchRegister, Immediate(0));
3587 0 : __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3588 0 : kScratchRegister);
3589 : }
3590 :
3591 328057 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3592 : BranchInfo* branch) {
3593 : Label::Distance flabel_distance =
3594 328057 : branch->fallthru ? Label::kNear : Label::kFar;
3595 328057 : Label* tlabel = branch->true_label;
3596 328057 : Label* flabel = branch->false_label;
3597 328057 : Label nodeopt;
3598 328057 : if (branch->condition == kUnorderedEqual) {
3599 0 : __ j(parity_even, flabel, flabel_distance);
3600 328057 : } else if (branch->condition == kUnorderedNotEqual) {
3601 4140 : __ j(parity_even, tlabel);
3602 : }
3603 328057 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3604 :
3605 328058 : if (FLAG_deopt_every_n_times > 0) {
3606 : ExternalReference counter =
3607 288 : ExternalReference::stress_deopt_count(isolate());
3608 :
3609 288 : __ pushfq();
3610 288 : __ pushq(rax);
3611 288 : __ load_rax(counter);
3612 : __ decl(rax);
3613 288 : __ j(not_zero, &nodeopt);
3614 :
3615 576 : __ Set(rax, FLAG_deopt_every_n_times);
3616 288 : __ store_rax(counter);
3617 288 : __ popq(rax);
3618 288 : __ popfq();
3619 288 : __ jmp(tlabel);
3620 :
3621 288 : __ bind(&nodeopt);
3622 288 : __ store_rax(counter);
3623 288 : __ popq(rax);
3624 288 : __ popfq();
3625 : }
3626 :
3627 328058 : if (!branch->fallthru) {
3628 0 : __ jmp(flabel, flabel_distance);
3629 : }
3630 328058 : }
3631 :
3632 5035280 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
3633 8252469 : if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3634 5035331 : }
3635 :
3636 142022 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
3637 : FlagsCondition condition) {
3638 : auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3639 : Label* tlabel = ool->entry();
3640 141934 : Label end;
3641 141934 : if (condition == kUnorderedEqual) {
3642 0 : __ j(parity_even, &end);
3643 141934 : } else if (condition == kUnorderedNotEqual) {
3644 307 : __ j(parity_even, tlabel);
3645 : }
3646 141934 : __ j(FlagsConditionToCondition(condition), tlabel);
3647 142080 : __ bind(&end);
3648 142164 : }
3649 :
3650 : // Assembles boolean materializations after this instruction.
3651 376429 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3652 : FlagsCondition condition) {
3653 : X64OperandConverter i(this, instr);
3654 376429 : Label done;
3655 :
3656 : // Materialize a full 64-bit 1 or 0 value. The result register is always the
3657 : // last output of the instruction.
3658 376429 : Label check;
3659 : DCHECK_NE(0u, instr->OutputCount());
3660 376429 : Register reg = i.OutputRegister(instr->OutputCount() - 1);
3661 376429 : if (condition == kUnorderedEqual) {
3662 2920 : __ j(parity_odd, &check, Label::kNear);
3663 : __ movl(reg, Immediate(0));
3664 2920 : __ jmp(&done, Label::kNear);
3665 373509 : } else if (condition == kUnorderedNotEqual) {
3666 2492 : __ j(parity_odd, &check, Label::kNear);
3667 : __ movl(reg, Immediate(1));
3668 2492 : __ jmp(&done, Label::kNear);
3669 : }
3670 376429 : __ bind(&check);
3671 376433 : __ setcc(FlagsConditionToCondition(condition), reg);
3672 : __ movzxbl(reg, reg);
3673 376430 : __ bind(&done);
3674 376434 : }
3675 :
3676 34090 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3677 : X64OperandConverter i(this, instr);
3678 34090 : Register input = i.InputRegister(0);
3679 : std::vector<std::pair<int32_t, Label*>> cases;
3680 419084 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3681 384992 : cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3682 : }
3683 : AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3684 34092 : cases.data() + cases.size());
3685 34091 : }
3686 :
3687 0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3688 : X64OperandConverter i(this, instr);
3689 : Register input = i.InputRegister(0);
3690 0 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3691 0 : __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3692 0 : __ j(equal, GetLabel(i.InputRpo(index + 1)));
3693 : }
3694 0 : AssembleArchJump(i.InputRpo(1));
3695 0 : }
3696 :
3697 315 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3698 : X64OperandConverter i(this, instr);
3699 : Register input = i.InputRegister(0);
3700 315 : int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3701 315 : Label** cases = zone()->NewArray<Label*>(case_count);
3702 399611 : for (int32_t index = 0; index < case_count; ++index) {
3703 399296 : cases[index] = GetLabel(i.InputRpo(index + 2));
3704 : }
3705 315 : Label* const table = AddJumpTable(cases, case_count);
3706 315 : __ cmpl(input, Immediate(case_count));
3707 315 : __ j(above_equal, GetLabel(i.InputRpo(1)));
3708 630 : __ leaq(kScratchRegister, Operand(table));
3709 315 : __ jmp(Operand(kScratchRegister, input, times_8, 0));
3710 315 : }
3711 :
3712 : namespace {
3713 :
3714 : static const int kQuadWordSize = 16;
3715 :
3716 : } // namespace
3717 :
3718 2629955 : void CodeGenerator::FinishFrame(Frame* frame) {
3719 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3720 :
3721 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3722 2629955 : if (saves_fp != 0) {
3723 : frame->AlignSavedCalleeRegisterSlots();
3724 0 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3725 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3726 0 : frame->AllocateSavedCalleeRegisterSlots(
3727 0 : saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3728 : }
3729 : }
3730 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3731 2629955 : if (saves != 0) { // Save callee-saved registers.
3732 : int count = 0;
3733 29846157 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3734 14470864 : if (((1 << i) & saves)) {
3735 4522145 : ++count;
3736 : }
3737 : }
3738 : frame->AllocateSavedCalleeRegisterSlots(count);
3739 : }
3740 2629955 : }
3741 :
3742 2651810 : void CodeGenerator::AssembleConstructFrame() {
3743 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3744 2651810 : if (frame_access_state()->has_frame()) {
3745 : int pc_base = __ pc_offset();
3746 :
3747 2652514 : if (call_descriptor->IsCFunctionCall()) {
3748 904429 : __ pushq(rbp);
3749 : __ movq(rbp, rsp);
3750 1748085 : } else if (call_descriptor->IsJSFunctionCall()) {
3751 645108 : __ Prologue();
3752 645116 : if (call_descriptor->PushArgumentCount()) {
3753 39056 : __ pushq(kJavaScriptCallArgCountRegister);
3754 : }
3755 : } else {
3756 2206078 : __ StubPrologue(info()->GetOutputStackFrameType());
3757 1102824 : if (call_descriptor->IsWasmFunctionCall()) {
3758 977424 : __ pushq(kWasmInstanceRegister);
3759 125400 : } else if (call_descriptor->IsWasmImportWrapper()) {
3760 : // WASM import wrappers are passed a tuple in the place of the instance.
3761 : // Unpack the tuple into the instance and the target callable.
3762 : // This must be done here in the codegen because it cannot be expressed
3763 : // properly in the graph.
3764 : __ LoadTaggedPointerField(
3765 : kJSFunctionRegister,
3766 6858 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3767 : __ LoadTaggedPointerField(
3768 : kWasmInstanceRegister,
3769 6858 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3770 6858 : __ pushq(kWasmInstanceRegister);
3771 : }
3772 : }
3773 :
3774 2652635 : unwinding_info_writer_.MarkFrameConstructed(pc_base);
3775 : }
3776 : int required_slots = frame()->GetTotalFrameSlotCount() -
3777 2651176 : call_descriptor->CalculateFixedFrameSize();
3778 :
3779 2652142 : if (info()->is_osr()) {
3780 : // TurboFan OSR-compiled functions cannot be entered directly.
3781 4629 : __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3782 :
3783 : // Unoptimized code jumps directly to this entrypoint while the unoptimized
3784 : // frame is still on the stack. Optimized code uses OSR values directly from
3785 : // the unoptimized frame. Thus, all that needs to be done is to allocate the
3786 : // remaining stack slots.
3787 4629 : if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3788 4629 : osr_pc_offset_ = __ pc_offset();
3789 4629 : required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3790 4629 : ResetSpeculationPoison();
3791 : }
3792 :
3793 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3794 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3795 :
3796 2651906 : if (required_slots > 0) {
3797 : DCHECK(frame_access_state()->has_frame());
3798 2048694 : if (info()->IsWasm() && required_slots > 128) {
3799 : // For WebAssembly functions with big frames we have to do the stack
3800 : // overflow check before we construct the frame. Otherwise we may not
3801 : // have enough space on the stack to call the runtime for the stack
3802 : // overflow.
3803 8 : Label done;
3804 :
3805 : // If the frame is bigger than the stack, we throw the stack overflow
3806 : // exception unconditionally. Thereby we can avoid the integer overflow
3807 : // check in the condition code.
3808 8 : if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3809 8 : __ movq(kScratchRegister,
3810 : FieldOperand(kWasmInstanceRegister,
3811 : WasmInstanceObject::kRealStackLimitAddressOffset));
3812 16 : __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3813 : __ addq(kScratchRegister,
3814 : Immediate(required_slots * kSystemPointerSize));
3815 : __ cmpq(rsp, kScratchRegister);
3816 8 : __ j(above_equal, &done);
3817 : }
3818 :
3819 8 : __ near_call(wasm::WasmCode::kWasmStackOverflow,
3820 8 : RelocInfo::WASM_STUB_CALL);
3821 : ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3822 : RecordSafepoint(reference_map, Safepoint::kSimple,
3823 8 : Safepoint::kNoLazyDeopt);
3824 8 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3825 8 : __ bind(&done);
3826 : }
3827 :
3828 : // Skip callee-saved and return slots, which are created below.
3829 2048694 : required_slots -= base::bits::CountPopulation(saves);
3830 : required_slots -= base::bits::CountPopulation(saves_fp) *
3831 2048694 : (kQuadWordSize / kSystemPointerSize);
3832 2048694 : required_slots -= frame()->GetReturnSlotCount();
3833 2048694 : if (required_slots > 0) {
3834 1872198 : __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
3835 : }
3836 : }
3837 :
3838 2651924 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3839 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3840 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3841 : // Adjust the stack pointer.
3842 0 : __ subq(rsp, Immediate(stack_size));
3843 : // Store the registers on the stack.
3844 : int slot_idx = 0;
3845 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3846 0 : if (!((1 << i) & saves_fp)) continue;
3847 0 : __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3848 0 : XMMRegister::from_code(i));
3849 0 : slot_idx++;
3850 : }
3851 : }
3852 :
3853 2651924 : if (saves != 0) { // Save callee-saved registers.
3854 29846157 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3855 14470864 : if (!((1 << i) & saves)) continue;
3856 4522145 : __ pushq(Register::from_code(i));
3857 : }
3858 : }
3859 :
3860 : // Allocate return slots (located after callee-saved).
3861 2651924 : if (frame()->GetReturnSlotCount() > 0) {
3862 666 : __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3863 : }
3864 2651925 : }
3865 :
3866 2988870 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3867 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3868 :
3869 : // Restore registers.
3870 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3871 2988870 : if (saves != 0) {
3872 : const int returns = frame()->GetReturnSlotCount();
3873 913797 : if (returns != 0) {
3874 656 : __ addq(rsp, Immediate(returns * kSystemPointerSize));
3875 : }
3876 30155301 : for (int i = 0; i < Register::kNumRegisters; i++) {
3877 14620752 : if (!((1 << i) & saves)) continue;
3878 4568985 : __ popq(Register::from_code(i));
3879 : }
3880 : }
3881 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3882 2988870 : if (saves_fp != 0) {
3883 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3884 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3885 : // Load the registers from the stack.
3886 : int slot_idx = 0;
3887 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3888 0 : if (!((1 << i) & saves_fp)) continue;
3889 0 : __ movdqu(XMMRegister::from_code(i),
3890 0 : Operand(rsp, kQuadWordSize * slot_idx));
3891 0 : slot_idx++;
3892 : }
3893 : // Adjust the stack pointer.
3894 0 : __ addq(rsp, Immediate(stack_size));
3895 : }
3896 :
3897 : unwinding_info_writer_.MarkBlockWillExit();
3898 :
3899 : // Might need rcx for scratch if pop_size is too big or if there is a variable
3900 : // pop count.
3901 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3902 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3903 2988663 : size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3904 : X64OperandConverter g(this, nullptr);
3905 2988663 : if (call_descriptor->IsCFunctionCall()) {
3906 913797 : AssembleDeconstructFrame();
3907 2074866 : } else if (frame_access_state()->has_frame()) {
3908 4021987 : if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3909 : // Canonicalize JSFunction return sites for now.
3910 1991125 : if (return_label_.is_bound()) {
3911 369232 : __ jmp(&return_label_);
3912 : return;
3913 : } else {
3914 1621893 : __ bind(&return_label_);
3915 1621969 : AssembleDeconstructFrame();
3916 : }
3917 : } else {
3918 39900 : AssembleDeconstructFrame();
3919 : }
3920 : }
3921 :
3922 2620843 : if (pop->IsImmediate()) {
3923 5162206 : pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3924 2581239 : CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3925 5162478 : __ Ret(static_cast<int>(pop_size), rcx);
3926 : } else {
3927 : Register pop_reg = g.ToRegister(pop);
3928 39876 : Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3929 39876 : __ popq(scratch_reg);
3930 79752 : __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3931 39876 : __ jmp(scratch_reg);
3932 : }
3933 : }
3934 :
3935 2629810 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3936 :
3937 37780899 : void CodeGenerator::AssembleMove(InstructionOperand* source,
3938 : InstructionOperand* destination) {
3939 : X64OperandConverter g(this, nullptr);
3940 : // Helper function to write the given constant to the dst register.
3941 18408755 : auto MoveConstantToRegister = [&](Register dst, Constant src) {
3942 18408755 : switch (src.type()) {
3943 : case Constant::kInt32: {
3944 3992970 : if (RelocInfo::IsWasmReference(src.rmode())) {
3945 26272008 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3946 : } else {
3947 : int32_t value = src.ToInt32();
3948 3992970 : if (value == 0) {
3949 1108922 : __ xorl(dst, dst);
3950 : } else {
3951 2884048 : __ movl(dst, Immediate(value));
3952 : }
3953 : }
3954 : break;
3955 : }
3956 : case Constant::kInt64:
3957 2318975 : if (RelocInfo::IsWasmReference(src.rmode())) {
3958 0 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3959 : } else {
3960 2318975 : __ Set(dst, src.ToInt64());
3961 : }
3962 : break;
3963 : case Constant::kFloat32:
3964 824 : __ MoveNumber(dst, src.ToFloat32());
3965 412 : break;
3966 : case Constant::kFloat64:
3967 1522619 : __ MoveNumber(dst, src.ToFloat64().value());
3968 1522622 : break;
3969 : case Constant::kExternalReference:
3970 2708418 : __ Move(dst, src.ToExternalReference());
3971 2708437 : break;
3972 : case Constant::kHeapObject: {
3973 7863244 : Handle<HeapObject> src_object = src.ToHeapObject();
3974 : RootIndex index;
3975 7863246 : if (IsMaterializableFromRoot(src_object, &index)) {
3976 1901747 : __ LoadRoot(dst, index);
3977 : } else {
3978 5961501 : __ Move(dst, src_object);
3979 : }
3980 : break;
3981 : }
3982 : case Constant::kDelayedStringConstant: {
3983 2120 : const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3984 2120 : __ MoveStringConstant(dst, src_constant);
3985 2120 : break;
3986 : }
3987 : case Constant::kRpoNumber:
3988 0 : UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3989 : break;
3990 : }
3991 56189736 : };
3992 : // Helper function to write the given constant to the stack.
3993 38645 : auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3994 38645 : if (!RelocInfo::IsWasmReference(src.rmode())) {
3995 38645 : switch (src.type()) {
3996 : case Constant::kInt32:
3997 38645 : __ movq(dst, Immediate(src.ToInt32()));
3998 19627 : return;
3999 : case Constant::kInt64:
4000 12984 : __ Set(dst, src.ToInt64());
4001 12984 : return;
4002 : default:
4003 : break;
4004 : }
4005 : }
4006 6034 : MoveConstantToRegister(kScratchRegister, src);
4007 6034 : __ movq(dst, kScratchRegister);
4008 37780899 : };
4009 : // Dispatch on the source and destination operand kinds.
4010 37780899 : switch (MoveType::InferMove(source, destination)) {
4011 : case MoveType::kRegisterToRegister:
4012 4177874 : if (source->IsRegister()) {
4013 4035564 : __ movq(g.ToRegister(destination), g.ToRegister(source));
4014 : } else {
4015 : DCHECK(source->IsFPRegister());
4016 : __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4017 : }
4018 : return;
4019 : case MoveType::kRegisterToStack: {
4020 5686759 : Operand dst = g.ToOperand(destination);
4021 5686759 : if (source->IsRegister()) {
4022 5374177 : __ movq(dst, g.ToRegister(source));
4023 : } else {
4024 : DCHECK(source->IsFPRegister());
4025 : XMMRegister src = g.ToDoubleRegister(source);
4026 : MachineRepresentation rep =
4027 : LocationOperand::cast(source)->representation();
4028 312582 : if (rep != MachineRepresentation::kSimd128) {
4029 : __ Movsd(dst, src);
4030 : } else {
4031 : __ Movups(dst, src);
4032 : }
4033 : }
4034 : return;
4035 : }
4036 : case MoveType::kStackToRegister: {
4037 8879031 : Operand src = g.ToOperand(source);
4038 8879031 : if (source->IsStackSlot()) {
4039 8392929 : __ movq(g.ToRegister(destination), src);
4040 : } else {
4041 : DCHECK(source->IsFPStackSlot());
4042 : XMMRegister dst = g.ToDoubleRegister(destination);
4043 : MachineRepresentation rep =
4044 : LocationOperand::cast(source)->representation();
4045 486102 : if (rep != MachineRepresentation::kSimd128) {
4046 : __ Movsd(dst, src);
4047 : } else {
4048 : __ Movups(dst, src);
4049 : }
4050 : }
4051 : return;
4052 : }
4053 : case MoveType::kStackToStack: {
4054 50173 : Operand src = g.ToOperand(source);
4055 50173 : Operand dst = g.ToOperand(destination);
4056 50173 : if (source->IsStackSlot()) {
4057 : // Spill on demand to use a temporary register for memory-to-memory
4058 : // moves.
4059 25015 : __ movq(kScratchRegister, src);
4060 : __ movq(dst, kScratchRegister);
4061 : } else {
4062 : MachineRepresentation rep =
4063 : LocationOperand::cast(source)->representation();
4064 25158 : if (rep != MachineRepresentation::kSimd128) {
4065 : __ Movsd(kScratchDoubleReg, src);
4066 : __ Movsd(dst, kScratchDoubleReg);
4067 : } else {
4068 : DCHECK(source->IsSimd128StackSlot());
4069 : __ Movups(kScratchDoubleReg, src);
4070 : __ Movups(dst, kScratchDoubleReg);
4071 : }
4072 : }
4073 : return;
4074 : }
4075 : case MoveType::kConstantToRegister: {
4076 18942774 : Constant src = g.ToConstant(source);
4077 18942972 : if (destination->IsRegister()) {
4078 18402770 : MoveConstantToRegister(g.ToRegister(destination), src);
4079 : } else {
4080 : DCHECK(destination->IsFPRegister());
4081 540202 : XMMRegister dst = g.ToDoubleRegister(destination);
4082 540202 : if (src.type() == Constant::kFloat32) {
4083 : // TODO(turbofan): Can we do better here?
4084 125229 : __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4085 : } else {
4086 : DCHECK_EQ(src.type(), Constant::kFloat64);
4087 414973 : __ Move(dst, src.ToFloat64().AsUint64());
4088 : }
4089 : }
4090 : return;
4091 : }
4092 : case MoveType::kConstantToStack: {
4093 45200 : Constant src = g.ToConstant(source);
4094 45200 : Operand dst = g.ToOperand(destination);
4095 45200 : if (destination->IsStackSlot()) {
4096 38645 : MoveConstantToSlot(dst, src);
4097 : } else {
4098 : DCHECK(destination->IsFPStackSlot());
4099 6555 : if (src.type() == Constant::kFloat32) {
4100 2980 : __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4101 : } else {
4102 : DCHECK_EQ(src.type(), Constant::kFloat64);
4103 3575 : __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4104 : __ movq(dst, kScratchRegister);
4105 : }
4106 : }
4107 : return;
4108 : }
4109 : }
4110 0 : UNREACHABLE();
4111 : }
4112 :
4113 77532 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
4114 : InstructionOperand* destination) {
4115 : X64OperandConverter g(this, nullptr);
4116 : // Dispatch on the source and destination operand kinds. Not all
4117 : // combinations are possible.
4118 77532 : switch (MoveType::InferSwap(source, destination)) {
4119 : case MoveType::kRegisterToRegister: {
4120 66932 : if (source->IsRegister()) {
4121 : Register src = g.ToRegister(source);
4122 : Register dst = g.ToRegister(destination);
4123 64281 : __ movq(kScratchRegister, src);
4124 : __ movq(src, dst);
4125 : __ movq(dst, kScratchRegister);
4126 : } else {
4127 : DCHECK(source->IsFPRegister());
4128 : XMMRegister src = g.ToDoubleRegister(source);
4129 : XMMRegister dst = g.ToDoubleRegister(destination);
4130 : __ Movapd(kScratchDoubleReg, src);
4131 : __ Movapd(src, dst);
4132 : __ Movapd(dst, kScratchDoubleReg);
4133 : }
4134 : return;
4135 : }
4136 : case MoveType::kRegisterToStack: {
4137 6603 : if (source->IsRegister()) {
4138 : Register src = g.ToRegister(source);
4139 1599 : __ pushq(src);
4140 : frame_access_state()->IncreaseSPDelta(1);
4141 1599 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4142 1599 : kSystemPointerSize);
4143 : __ movq(src, g.ToOperand(destination));
4144 : frame_access_state()->IncreaseSPDelta(-1);
4145 1599 : __ popq(g.ToOperand(destination));
4146 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4147 1599 : -kSystemPointerSize);
4148 : } else {
4149 : DCHECK(source->IsFPRegister());
4150 : XMMRegister src = g.ToDoubleRegister(source);
4151 5004 : Operand dst = g.ToOperand(destination);
4152 : MachineRepresentation rep =
4153 : LocationOperand::cast(source)->representation();
4154 5004 : if (rep != MachineRepresentation::kSimd128) {
4155 : __ Movsd(kScratchDoubleReg, src);
4156 : __ Movsd(src, dst);
4157 : __ Movsd(dst, kScratchDoubleReg);
4158 : } else {
4159 : __ Movups(kScratchDoubleReg, src);
4160 : __ Movups(src, dst);
4161 : __ Movups(dst, kScratchDoubleReg);
4162 : }
4163 : }
4164 : return;
4165 : }
4166 : case MoveType::kStackToStack: {
4167 3997 : Operand src = g.ToOperand(source);
4168 3997 : Operand dst = g.ToOperand(destination);
4169 : MachineRepresentation rep =
4170 : LocationOperand::cast(source)->representation();
4171 3997 : if (rep != MachineRepresentation::kSimd128) {
4172 : Register tmp = kScratchRegister;
4173 3201 : __ movq(tmp, dst);
4174 3201 : __ pushq(src); // Then use stack to copy src to destination.
4175 3201 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4176 3201 : kSystemPointerSize);
4177 3201 : __ popq(dst);
4178 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4179 3201 : -kSystemPointerSize);
4180 : __ movq(src, tmp);
4181 : } else {
4182 : // Without AVX, misaligned reads and writes will trap. Move using the
4183 : // stack, in two parts.
4184 796 : __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4185 796 : __ pushq(src); // Then use stack to copy src to destination.
4186 796 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4187 796 : kSystemPointerSize);
4188 796 : __ popq(dst);
4189 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4190 796 : -kSystemPointerSize);
4191 796 : __ pushq(g.ToOperand(source, kSystemPointerSize));
4192 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4193 796 : kSystemPointerSize);
4194 796 : __ popq(g.ToOperand(destination, kSystemPointerSize));
4195 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4196 796 : -kSystemPointerSize);
4197 796 : __ movups(src, kScratchDoubleReg);
4198 : }
4199 : return;
4200 : }
4201 : default:
4202 0 : UNREACHABLE();
4203 : break;
4204 : }
4205 : }
4206 :
4207 315 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4208 399611 : for (size_t index = 0; index < target_count; ++index) {
4209 199648 : __ dq(targets[index]);
4210 : }
4211 315 : }
4212 :
4213 : #undef __
4214 :
4215 : } // namespace compiler
4216 : } // namespace internal
4217 122036 : } // namespace v8
|