Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/code-generator.h"
6 :
7 : #include <limits>
8 :
9 : #include "src/base/overflowing-math.h"
10 : #include "src/compiler/backend/code-generator-impl.h"
11 : #include "src/compiler/backend/gap-resolver.h"
12 : #include "src/compiler/node-matchers.h"
13 : #include "src/compiler/osr.h"
14 : #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 : #include "src/macro-assembler.h"
16 : #include "src/objects/smi.h"
17 : #include "src/optimized-compilation-info.h"
18 : #include "src/wasm/wasm-code-manager.h"
19 : #include "src/wasm/wasm-objects.h"
20 : #include "src/x64/assembler-x64.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 : namespace compiler {
25 :
26 : #define __ tasm()->
27 :
28 : // Adds X64 specific methods for decoding operands.
29 : class X64OperandConverter : public InstructionOperandConverter {
30 : public:
31 : X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : : InstructionOperandConverter(gen, instr) {}
33 :
34 : Immediate InputImmediate(size_t index) {
35 5360832 : return ToImmediate(instr_->InputAt(index));
36 : }
37 :
38 1056765 : Operand InputOperand(size_t index, int extra = 0) {
39 2113534 : return ToOperand(instr_->InputAt(index), extra);
40 : }
41 :
42 0 : Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 :
44 4405068 : Immediate ToImmediate(InstructionOperand* operand) {
45 4405068 : Constant constant = ToConstant(operand);
46 4405151 : if (constant.type() == Constant::kFloat64) {
47 : DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 365964 : return Immediate(0);
49 : }
50 4039187 : if (RelocInfo::IsWasmReference(constant.rmode())) {
51 0 : return Immediate(constant.ToInt32(), constant.rmode());
52 : }
53 4039187 : return Immediate(constant.ToInt32());
54 : }
55 :
56 : Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 : DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 15800601 : return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 : }
60 :
61 15813533 : Operand SlotToOperand(int slot_index, int extra = 0) {
62 15813533 : FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 : return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 31626908 : offset.offset() + extra);
65 : }
66 :
67 : static size_t NextOffset(size_t* offset) {
68 16421465 : size_t i = *offset;
69 29397426 : (*offset)++;
70 : return i;
71 : }
72 :
73 : static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 : STATIC_ASSERT(0 == static_cast<int>(times_1));
75 : STATIC_ASSERT(1 == static_cast<int>(times_2));
76 : STATIC_ASSERT(2 == static_cast<int>(times_4));
77 : STATIC_ASSERT(3 == static_cast<int>(times_8));
78 1311255 : int scale = static_cast<int>(mode - one);
79 : DCHECK(scale >= 0 && scale < 4);
80 1311255 : return static_cast<ScaleFactor>(scale);
81 : }
82 :
83 16421465 : Operand MemoryOperand(size_t* offset) {
84 16421465 : AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 16421465 : switch (mode) {
86 : case kMode_MR: {
87 2381163 : Register base = InputRegister(NextOffset(offset));
88 : int32_t disp = 0;
89 2381163 : return Operand(base, disp);
90 : }
91 : case kMode_MRI: {
92 11229271 : Register base = InputRegister(NextOffset(offset));
93 : int32_t disp = InputInt32(NextOffset(offset));
94 11228954 : return Operand(base, disp);
95 : }
96 : case kMode_MR1:
97 : case kMode_MR2:
98 : case kMode_MR4:
99 : case kMode_MR8: {
100 703859 : Register base = InputRegister(NextOffset(offset));
101 703859 : Register index = InputRegister(NextOffset(offset));
102 : ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 : int32_t disp = 0;
104 703859 : return Operand(base, index, scale, disp);
105 : }
106 : case kMode_MR1I:
107 : case kMode_MR2I:
108 : case kMode_MR4I:
109 : case kMode_MR8I: {
110 455182 : Register base = InputRegister(NextOffset(offset));
111 455182 : Register index = InputRegister(NextOffset(offset));
112 : ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 : int32_t disp = InputInt32(NextOffset(offset));
114 455180 : return Operand(base, index, scale, disp);
115 : }
116 : case kMode_M1: {
117 0 : Register base = InputRegister(NextOffset(offset));
118 : int32_t disp = 0;
119 0 : return Operand(base, disp);
120 : }
121 : case kMode_M2:
122 0 : UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 : return Operand(no_reg, 0);
124 : case kMode_M4:
125 : case kMode_M8: {
126 19747 : Register index = InputRegister(NextOffset(offset));
127 : ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 : int32_t disp = 0;
129 19747 : return Operand(index, scale, disp);
130 : }
131 : case kMode_M1I:
132 : case kMode_M2I:
133 : case kMode_M4I:
134 : case kMode_M8I: {
135 132467 : Register index = InputRegister(NextOffset(offset));
136 : ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 : int32_t disp = InputInt32(NextOffset(offset));
138 132467 : return Operand(index, scale, disp);
139 : }
140 : case kMode_Root: {
141 1499776 : Register base = kRootRegister;
142 : int32_t disp = InputInt32(NextOffset(offset));
143 1499776 : return Operand(base, disp);
144 : }
145 : case kMode_None:
146 0 : UNREACHABLE();
147 : }
148 0 : UNREACHABLE();
149 : }
150 :
151 : Operand MemoryOperand(size_t first_input = 0) {
152 9281912 : return MemoryOperand(&first_input);
153 : }
154 : };
155 :
156 : namespace {
157 :
158 : bool HasImmediateInput(Instruction* instr, size_t index) {
159 : return instr->InputAt(index)->IsImmediate();
160 : }
161 :
162 0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 : public:
164 : OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 132 : : OutOfLineCode(gen), result_(result) {}
166 :
167 132 : void Generate() final {
168 : __ Xorps(result_, result_);
169 : __ Divss(result_, result_);
170 132 : }
171 :
172 : private:
173 : XMMRegister const result_;
174 : };
175 :
176 0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 : public:
178 : OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 591 : : OutOfLineCode(gen), result_(result) {}
180 :
181 590 : void Generate() final {
182 : __ Xorpd(result_, result_);
183 : __ Divsd(result_, result_);
184 591 : }
185 :
186 : private:
187 : XMMRegister const result_;
188 : };
189 :
190 0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 : public:
192 : OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 : XMMRegister input, StubCallMode stub_mode,
194 : UnwindingInfoWriter* unwinding_info_writer)
195 : : OutOfLineCode(gen),
196 : result_(result),
197 : input_(input),
198 : stub_mode_(stub_mode),
199 : unwinding_info_writer_(unwinding_info_writer),
200 : isolate_(gen->isolate()),
201 53479 : zone_(gen->zone()) {}
202 :
203 53473 : void Generate() final {
204 53473 : __ subq(rsp, Immediate(kDoubleSize));
205 53477 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 53477 : kDoubleSize);
207 106952 : __ Movsd(MemOperand(rsp, 0), input_);
208 53473 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 : // A direct call to a wasm runtime stub defined in this module.
210 : // Just encode the stub index. This will be patched when the code
211 : // is added to the native module and copied into wasm code space.
212 1626 : __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 : } else {
214 103694 : __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 : }
216 106958 : __ movl(result_, MemOperand(rsp, 0));
217 53479 : __ addq(rsp, Immediate(kDoubleSize));
218 53478 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 53478 : -kDoubleSize);
220 53478 : }
221 :
222 : private:
223 : Register const result_;
224 : XMMRegister const input_;
225 : StubCallMode stub_mode_;
226 : UnwindingInfoWriter* const unwinding_info_writer_;
227 : Isolate* isolate_;
228 : Zone* zone_;
229 : };
230 :
231 0 : class OutOfLineRecordWrite final : public OutOfLineCode {
232 : public:
233 : OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 : Register value, Register scratch0, Register scratch1,
235 : RecordWriteMode mode, StubCallMode stub_mode)
236 : : OutOfLineCode(gen),
237 : object_(object),
238 : operand_(operand),
239 : value_(value),
240 : scratch0_(scratch0),
241 : scratch1_(scratch1),
242 : mode_(mode),
243 : stub_mode_(stub_mode),
244 320389 : zone_(gen->zone()) {}
245 :
246 320388 : void Generate() final {
247 320388 : if (mode_ > RecordWriteMode::kValueIsPointer) {
248 253115 : __ JumpIfSmi(value_, exit());
249 : }
250 : __ CheckPageFlag(value_, scratch0_,
251 : MemoryChunk::kPointersToHereAreInterestingMask, zero,
252 320388 : exit());
253 320389 : __ leaq(scratch1_, operand_);
254 :
255 : RememberedSetAction const remembered_set_action =
256 320389 : mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
257 320389 : : OMIT_REMEMBERED_SET;
258 : SaveFPRegsMode const save_fp_mode =
259 320389 : frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
260 :
261 320389 : if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
262 112 : __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
263 320277 : } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
264 : // A direct call to a wasm runtime stub defined in this module.
265 : // Just encode the stub index. This will be patched when the code
266 : // is added to the native module and copied into wasm code space.
267 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
268 286 : save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
269 : } else {
270 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
271 319991 : save_fp_mode);
272 : }
273 320389 : }
274 :
275 : private:
276 : Register const object_;
277 : Operand const operand_;
278 : Register const value_;
279 : Register const scratch0_;
280 : Register const scratch1_;
281 : RecordWriteMode const mode_;
282 : StubCallMode const stub_mode_;
283 : Zone* zone_;
284 : };
285 :
286 0 : class WasmOutOfLineTrap : public OutOfLineCode {
287 : public:
288 : WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
289 384553 : : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
290 :
291 141996 : void Generate() override {
292 141996 : X64OperandConverter i(gen_, instr_);
293 : TrapId trap_id =
294 284064 : static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
295 : GenerateWithTrapId(trap_id);
296 142119 : }
297 :
298 : protected:
299 : CodeGenerator* gen_;
300 :
301 384091 : void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
302 :
303 : private:
304 383923 : void GenerateCallToTrap(TrapId trap_id) {
305 383923 : if (!gen_->wasm_runtime_exception_support()) {
306 : // We cannot test calls to the runtime in cctest/test-run-wasm.
307 : // Therefore we emit a call to C here instead of a call to the runtime.
308 153836 : __ PrepareCallCFunction(0);
309 153836 : __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
310 153836 : 0);
311 153836 : __ LeaveFrame(StackFrame::WASM_COMPILED);
312 153836 : auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
313 : size_t pop_size =
314 153836 : call_descriptor->StackParameterCount() * kSystemPointerSize;
315 : // Use rcx as a scratch register, we return anyways immediately.
316 153836 : __ Ret(static_cast<int>(pop_size), rcx);
317 : } else {
318 230220 : gen_->AssembleSourcePosition(instr_);
319 : // A direct call to a wasm runtime stub defined in this module.
320 : // Just encode the stub index. This will be patched when the code
321 : // is added to the native module and copied into wasm code space.
322 230658 : __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
323 : ReferenceMap* reference_map =
324 230285 : new (gen_->zone()) ReferenceMap(gen_->zone());
325 230107 : gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
326 230107 : Safepoint::kNoLazyDeopt);
327 230961 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
328 : }
329 384192 : }
330 :
331 : Instruction* instr_;
332 : };
333 :
334 0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
335 : public:
336 : WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
337 241659 : : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
338 :
339 241548 : void Generate() final {
340 241548 : gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
341 242023 : GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
342 242090 : }
343 :
344 : private:
345 : int pc_;
346 : };
347 :
348 12572853 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
349 : InstructionCode opcode, Instruction* instr,
350 : X64OperandConverter& i, int pc) {
351 : const MemoryAccessMode access_mode =
352 12572853 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
353 12572853 : if (access_mode == kMemoryAccessProtected) {
354 : new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
355 : }
356 12572054 : }
357 :
358 11906460 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
359 : InstructionCode opcode, Instruction* instr,
360 : X64OperandConverter& i) {
361 : const MemoryAccessMode access_mode =
362 11906460 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
363 11906460 : if (access_mode == kMemoryAccessPoisoned) {
364 : Register value = i.OutputRegister();
365 0 : codegen->tasm()->andq(value, kSpeculationPoisonRegister);
366 : }
367 11906460 : }
368 :
369 : } // namespace
370 :
371 : #define ASSEMBLE_UNOP(asm_instr) \
372 : do { \
373 : if (instr->Output()->IsRegister()) { \
374 : __ asm_instr(i.OutputRegister()); \
375 : } else { \
376 : __ asm_instr(i.OutputOperand()); \
377 : } \
378 : } while (false)
379 :
380 : #define ASSEMBLE_BINOP(asm_instr) \
381 : do { \
382 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
383 : size_t index = 1; \
384 : Operand right = i.MemoryOperand(&index); \
385 : __ asm_instr(i.InputRegister(0), right); \
386 : } else { \
387 : if (HasImmediateInput(instr, 1)) { \
388 : if (instr->InputAt(0)->IsRegister()) { \
389 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
390 : } else { \
391 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
392 : } \
393 : } else { \
394 : if (instr->InputAt(1)->IsRegister()) { \
395 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
396 : } else { \
397 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
398 : } \
399 : } \
400 : } \
401 : } while (false)
402 :
403 : #define ASSEMBLE_COMPARE(asm_instr) \
404 : do { \
405 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
406 : size_t index = 0; \
407 : Operand left = i.MemoryOperand(&index); \
408 : if (HasImmediateInput(instr, index)) { \
409 : __ asm_instr(left, i.InputImmediate(index)); \
410 : } else { \
411 : __ asm_instr(left, i.InputRegister(index)); \
412 : } \
413 : } else { \
414 : if (HasImmediateInput(instr, 1)) { \
415 : if (instr->InputAt(0)->IsRegister()) { \
416 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
417 : } else { \
418 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
419 : } \
420 : } else { \
421 : if (instr->InputAt(1)->IsRegister()) { \
422 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
423 : } else { \
424 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
425 : } \
426 : } \
427 : } \
428 : } while (false)
429 :
430 : #define ASSEMBLE_MULT(asm_instr) \
431 : do { \
432 : if (HasImmediateInput(instr, 1)) { \
433 : if (instr->InputAt(0)->IsRegister()) { \
434 : __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
435 : i.InputImmediate(1)); \
436 : } else { \
437 : __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
438 : i.InputImmediate(1)); \
439 : } \
440 : } else { \
441 : if (instr->InputAt(1)->IsRegister()) { \
442 : __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
443 : } else { \
444 : __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
445 : } \
446 : } \
447 : } while (false)
448 :
449 : #define ASSEMBLE_SHIFT(asm_instr, width) \
450 : do { \
451 : if (HasImmediateInput(instr, 1)) { \
452 : if (instr->Output()->IsRegister()) { \
453 : __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
454 : } else { \
455 : __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
456 : } \
457 : } else { \
458 : if (instr->Output()->IsRegister()) { \
459 : __ asm_instr##_cl(i.OutputRegister()); \
460 : } else { \
461 : __ asm_instr##_cl(i.OutputOperand()); \
462 : } \
463 : } \
464 : } while (false)
465 :
466 : #define ASSEMBLE_MOVX(asm_instr) \
467 : do { \
468 : if (instr->addressing_mode() != kMode_None) { \
469 : __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
470 : } else if (instr->InputAt(0)->IsRegister()) { \
471 : __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
472 : } else { \
473 : __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
474 : } \
475 : } while (false)
476 :
477 : #define ASSEMBLE_SSE_BINOP(asm_instr) \
478 : do { \
479 : if (instr->InputAt(1)->IsFPRegister()) { \
480 : __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
481 : } else { \
482 : __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
483 : } \
484 : } while (false)
485 :
486 : #define ASSEMBLE_SSE_UNOP(asm_instr) \
487 : do { \
488 : if (instr->InputAt(0)->IsFPRegister()) { \
489 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
490 : } else { \
491 : __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
492 : } \
493 : } while (false)
494 :
495 : #define ASSEMBLE_AVX_BINOP(asm_instr) \
496 : do { \
497 : CpuFeatureScope avx_scope(tasm(), AVX); \
498 : if (instr->InputAt(1)->IsFPRegister()) { \
499 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
500 : i.InputDoubleRegister(1)); \
501 : } else { \
502 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
503 : i.InputOperand(1)); \
504 : } \
505 : } while (false)
506 :
507 : #define ASSEMBLE_IEEE754_BINOP(name) \
508 : do { \
509 : __ PrepareCallCFunction(2); \
510 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
511 : } while (false)
512 :
513 : #define ASSEMBLE_IEEE754_UNOP(name) \
514 : do { \
515 : __ PrepareCallCFunction(1); \
516 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
517 : } while (false)
518 :
519 : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
520 : do { \
521 : Label binop; \
522 : __ bind(&binop); \
523 : __ mov_inst(rax, i.MemoryOperand(1)); \
524 : __ movl(i.TempRegister(0), rax); \
525 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
526 : __ lock(); \
527 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
528 : __ j(not_equal, &binop); \
529 : } while (false)
530 :
531 : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
532 : do { \
533 : Label binop; \
534 : __ bind(&binop); \
535 : __ mov_inst(rax, i.MemoryOperand(1)); \
536 : __ movq(i.TempRegister(0), rax); \
537 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
538 : __ lock(); \
539 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
540 : __ j(not_equal, &binop); \
541 : } while (false)
542 :
543 : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
544 : do { \
545 : if (instr->InputAt(index)->IsSimd128Register()) { \
546 : __ opcode(dst_operand, i.InputSimd128Register(index)); \
547 : } else { \
548 : __ opcode(dst_operand, i.InputOperand(index)); \
549 : } \
550 : } while (false)
551 :
552 : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
553 : do { \
554 : if (instr->InputAt(index)->IsSimd128Register()) { \
555 : __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
556 : } else { \
557 : __ opcode(dst_operand, i.InputOperand(index), imm); \
558 : } \
559 : } while (false)
560 :
561 : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
562 : do { \
563 : XMMRegister dst = i.OutputSimd128Register(); \
564 : DCHECK_EQ(dst, i.InputSimd128Register(0)); \
565 : byte input_index = instr->InputCount() == 2 ? 1 : 0; \
566 : ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
567 : } while (false)
568 :
569 : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
570 : do { \
571 : CpuFeatureScope sse_scope(tasm(), SSELevel); \
572 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
573 : __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
574 : } while (false)
575 :
576 2647565 : void CodeGenerator::AssembleDeconstructFrame() {
577 2647565 : unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
578 2647755 : __ movq(rsp, rbp);
579 2648486 : __ popq(rbp);
580 2648506 : }
581 :
582 119851 : void CodeGenerator::AssemblePrepareTailCall() {
583 119851 : if (frame_access_state()->has_frame()) {
584 137126 : __ movq(rbp, MemOperand(rbp, 0));
585 : }
586 : frame_access_state()->SetFrameAccessToSP();
587 119851 : }
588 :
589 1288 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
590 : Register scratch1,
591 : Register scratch2,
592 : Register scratch3) {
593 : DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
594 1288 : Label done;
595 :
596 : // Check if current frame is an arguments adaptor frame.
597 2576 : __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
598 1288 : Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
599 1288 : __ j(not_equal, &done, Label::kNear);
600 :
601 : // Load arguments count from current arguments adaptor frame (note, it
602 : // does not include receiver).
603 1288 : Register caller_args_count_reg = scratch1;
604 2576 : __ SmiUntag(caller_args_count_reg,
605 1288 : Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
606 :
607 : ParameterCount callee_args_count(args_reg);
608 : __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
609 1288 : scratch3);
610 1288 : __ bind(&done);
611 1288 : }
612 :
613 : namespace {
614 :
615 271410 : void AdjustStackPointerForTailCall(Assembler* assembler,
616 : FrameAccessState* state,
617 : int new_slot_above_sp,
618 : bool allow_shrinkage = true) {
619 : int current_sp_offset = state->GetSPToFPSlotCount() +
620 271410 : StandardFrameConstants::kFixedSlotCountAboveFp;
621 271410 : int stack_slot_delta = new_slot_above_sp - current_sp_offset;
622 271410 : if (stack_slot_delta > 0) {
623 792 : assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
624 : state->IncreaseSPDelta(stack_slot_delta);
625 270618 : } else if (allow_shrinkage && stack_slot_delta < 0) {
626 68199 : assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
627 : state->IncreaseSPDelta(stack_slot_delta);
628 : }
629 271410 : }
630 :
631 2224 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
632 2224 : int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
633 2224 : assembler->movq(kScratchRegister, shuffle_mask);
634 2224 : assembler->Push(kScratchRegister);
635 2224 : shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
636 : assembler->movq(kScratchRegister, shuffle_mask);
637 2224 : assembler->Push(kScratchRegister);
638 2224 : }
639 :
640 : } // namespace
641 :
642 119863 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
643 : int first_unused_stack_slot) {
644 119863 : CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
645 : ZoneVector<MoveOperands*> pushes(zone());
646 119863 : GetPushCompatibleMoves(instr, flags, &pushes);
647 :
648 132919 : if (!pushes.empty() &&
649 26112 : (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
650 : first_unused_stack_slot)) {
651 : X64OperandConverter g(this, instr);
652 44740 : for (auto move : pushes) {
653 : LocationOperand destination_location(
654 : LocationOperand::cast(move->destination()));
655 31684 : InstructionOperand source(move->source());
656 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
657 31684 : destination_location.index());
658 31684 : if (source.IsStackSlot()) {
659 : LocationOperand source_location(LocationOperand::cast(source));
660 13068 : __ Push(g.SlotToOperand(source_location.index()));
661 18616 : } else if (source.IsRegister()) {
662 : LocationOperand source_location(LocationOperand::cast(source));
663 18616 : __ Push(source_location.GetRegister());
664 0 : } else if (source.IsImmediate()) {
665 0 : __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
666 : } else {
667 : // Pushes of non-scalar data types is not supported.
668 0 : UNIMPLEMENTED();
669 : }
670 : frame_access_state()->IncreaseSPDelta(1);
671 : move->Eliminate();
672 : }
673 : }
674 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
675 119863 : first_unused_stack_slot, false);
676 119863 : }
677 :
678 119863 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
679 : int first_unused_stack_slot) {
680 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
681 119863 : first_unused_stack_slot);
682 119863 : }
683 :
684 : // Check that {kJavaScriptCallCodeStartRegister} is correct.
685 114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
686 114 : __ ComputeCodeStartAddress(rbx);
687 114 : __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
688 114 : __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
689 114 : }
690 :
691 : // Check if the code object is marked for deoptimization. If it is, then it
692 : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
693 : // to:
694 : // 1. read from memory the word that contains that bit, which can be found in
695 : // the flags in the referenced {CodeDataContainer} object;
696 : // 2. test kMarkedForDeoptimizationBit in those flags; and
697 : // 3. if it is not zero then it jumps to the builtin.
698 464073 : void CodeGenerator::BailoutIfDeoptimized() {
699 : int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
700 928144 : __ LoadTaggedPointerField(rbx,
701 464073 : Operand(kJavaScriptCallCodeStartRegister, offset));
702 464072 : __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
703 : Immediate(1 << Code::kMarkedForDeoptimizationBit));
704 464071 : __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
705 464073 : RelocInfo::CODE_TARGET, not_zero);
706 464072 : }
707 :
708 0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
709 : // Set a mask which has all bits set in the normal case, but has all
710 : // bits cleared if we are speculatively executing the wrong PC.
711 0 : __ ComputeCodeStartAddress(rbx);
712 0 : __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
713 : __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
714 : __ movq(rbx, Immediate(-1));
715 0 : __ cmovq(equal, kSpeculationPoisonRegister, rbx);
716 0 : }
717 :
718 0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
719 0 : __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
720 : __ andq(kContextRegister, kSpeculationPoisonRegister);
721 : __ andq(rsp, kSpeculationPoisonRegister);
722 0 : }
723 :
724 : // Assembles an instruction after register allocation, producing machine code.
725 68878638 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
726 : Instruction* instr) {
727 : X64OperandConverter i(this, instr);
728 : InstructionCode opcode = instr->opcode();
729 68878638 : ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
730 68878638 : switch (arch_opcode) {
731 : case kArchCallCodeObject: {
732 4828062 : if (HasImmediateInput(instr, 0)) {
733 4451568 : Handle<Code> code = i.InputCode(0);
734 4451568 : __ Call(code, RelocInfo::CODE_TARGET);
735 : } else {
736 376499 : Register reg = i.InputRegister(0);
737 : DCHECK_IMPLIES(
738 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
739 : reg == kJavaScriptCallCodeStartRegister);
740 376499 : __ LoadCodeObjectEntry(reg, reg);
741 376503 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
742 0 : __ RetpolineCall(reg);
743 : } else {
744 376503 : __ call(reg);
745 : }
746 : }
747 4828084 : RecordCallPosition(instr);
748 : frame_access_state()->ClearSPDelta();
749 : break;
750 : }
751 : case kArchCallBuiltinPointer: {
752 : DCHECK(!HasImmediateInput(instr, 0));
753 3608 : Register builtin_pointer = i.InputRegister(0);
754 3608 : __ CallBuiltinPointer(builtin_pointer);
755 3608 : RecordCallPosition(instr);
756 : frame_access_state()->ClearSPDelta();
757 : break;
758 : }
759 : case kArchCallWasmFunction: {
760 1181972 : if (HasImmediateInput(instr, 0)) {
761 192502 : Constant constant = i.ToConstant(instr->InputAt(0));
762 192564 : Address wasm_code = static_cast<Address>(constant.ToInt64());
763 192564 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
764 192613 : __ near_call(wasm_code, constant.rmode());
765 : } else {
766 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
767 0 : __ RetpolineCall(wasm_code, constant.rmode());
768 : } else {
769 0 : __ Call(wasm_code, constant.rmode());
770 : }
771 : }
772 : } else {
773 989470 : Register reg = i.InputRegister(0);
774 989470 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
775 0 : __ RetpolineCall(reg);
776 : } else {
777 989470 : __ call(reg);
778 : }
779 : }
780 1182044 : RecordCallPosition(instr);
781 : frame_access_state()->ClearSPDelta();
782 : break;
783 : }
784 : case kArchTailCallCodeObjectFromJSFunction:
785 : case kArchTailCallCodeObject: {
786 36864 : if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
787 : AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
788 : i.TempRegister(0), i.TempRegister(1),
789 1288 : i.TempRegister(2));
790 : }
791 36864 : if (HasImmediateInput(instr, 0)) {
792 31240 : Handle<Code> code = i.InputCode(0);
793 31240 : __ Jump(code, RelocInfo::CODE_TARGET);
794 : } else {
795 5624 : Register reg = i.InputRegister(0);
796 : DCHECK_IMPLIES(
797 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
798 : reg == kJavaScriptCallCodeStartRegister);
799 5624 : __ LoadCodeObjectEntry(reg, reg);
800 5624 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
801 0 : __ RetpolineJump(reg);
802 : } else {
803 5624 : __ jmp(reg);
804 : }
805 : }
806 : unwinding_info_writer_.MarkBlockWillExit();
807 : frame_access_state()->ClearSPDelta();
808 36864 : frame_access_state()->SetFrameAccessToDefault();
809 36864 : break;
810 : }
811 : case kArchTailCallWasm: {
812 219 : if (HasImmediateInput(instr, 0)) {
813 129 : Constant constant = i.ToConstant(instr->InputAt(0));
814 : Address wasm_code = static_cast<Address>(constant.ToInt64());
815 129 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
816 129 : __ near_jmp(wasm_code, constant.rmode());
817 : } else {
818 : __ Move(kScratchRegister, wasm_code, constant.rmode());
819 0 : __ jmp(kScratchRegister);
820 : }
821 : } else {
822 90 : Register reg = i.InputRegister(0);
823 90 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
824 0 : __ RetpolineJump(reg);
825 : } else {
826 90 : __ jmp(reg);
827 : }
828 : }
829 : unwinding_info_writer_.MarkBlockWillExit();
830 : frame_access_state()->ClearSPDelta();
831 219 : frame_access_state()->SetFrameAccessToDefault();
832 219 : break;
833 : }
834 : case kArchTailCallAddress: {
835 82768 : CHECK(!HasImmediateInput(instr, 0));
836 82768 : Register reg = i.InputRegister(0);
837 : DCHECK_IMPLIES(
838 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
839 : reg == kJavaScriptCallCodeStartRegister);
840 82768 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
841 0 : __ RetpolineJump(reg);
842 : } else {
843 82768 : __ jmp(reg);
844 : }
845 : unwinding_info_writer_.MarkBlockWillExit();
846 : frame_access_state()->ClearSPDelta();
847 82768 : frame_access_state()->SetFrameAccessToDefault();
848 : break;
849 : }
850 : case kArchCallJSFunction: {
851 : Register func = i.InputRegister(0);
852 23882 : if (FLAG_debug_code) {
853 : // Check the function's context matches the context argument.
854 8 : __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
855 8 : __ Assert(equal, AbortReason::kWrongFunctionContext);
856 : }
857 : static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
858 : __ LoadTaggedPointerField(rcx,
859 23883 : FieldOperand(func, JSFunction::kCodeOffset));
860 23882 : __ CallCodeObject(rcx);
861 : frame_access_state()->ClearSPDelta();
862 23882 : RecordCallPosition(instr);
863 : break;
864 : }
865 : case kArchPrepareCallCFunction: {
866 : // Frame alignment requires using FP-relative frame addressing.
867 : frame_access_state()->SetFrameAccessToFP();
868 25882 : int const num_parameters = MiscField::decode(instr->opcode());
869 25882 : __ PrepareCallCFunction(num_parameters);
870 25882 : break;
871 : }
872 : case kArchSaveCallerRegisters: {
873 : fp_mode_ =
874 900 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
875 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
876 : // kReturnRegister0 should have been saved before entering the stub.
877 900 : int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
878 : DCHECK(IsAligned(bytes, kSystemPointerSize));
879 : DCHECK_EQ(0, frame_access_state()->sp_delta());
880 900 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
881 : DCHECK(!caller_registers_saved_);
882 900 : caller_registers_saved_ = true;
883 900 : break;
884 : }
885 : case kArchRestoreCallerRegisters: {
886 : DCHECK(fp_mode_ ==
887 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
888 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
889 : // Don't overwrite the returned value.
890 1800 : int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
891 900 : frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
892 : DCHECK_EQ(0, frame_access_state()->sp_delta());
893 : DCHECK(caller_registers_saved_);
894 900 : caller_registers_saved_ = false;
895 900 : break;
896 : }
897 : case kArchPrepareTailCall:
898 119851 : AssemblePrepareTailCall();
899 119851 : break;
900 : case kArchCallCFunction: {
901 : int const num_parameters = MiscField::decode(instr->opcode());
902 25882 : if (HasImmediateInput(instr, 0)) {
903 24742 : ExternalReference ref = i.InputExternalReference(0);
904 24742 : __ CallCFunction(ref, num_parameters);
905 : } else {
906 1140 : Register func = i.InputRegister(0);
907 1140 : __ CallCFunction(func, num_parameters);
908 : }
909 25882 : frame_access_state()->SetFrameAccessToDefault();
910 : // Ideally, we should decrement SP delta to match the change of stack
911 : // pointer in CallCFunction. However, for certain architectures (e.g.
912 : // ARM), there may be more strict alignment requirement, causing old SP
913 : // to be saved on the stack. In those cases, we can not calculate the SP
914 : // delta statically.
915 : frame_access_state()->ClearSPDelta();
916 25882 : if (caller_registers_saved_) {
917 : // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
918 : // Here, we assume the sequence to be:
919 : // kArchSaveCallerRegisters;
920 : // kArchCallCFunction;
921 : // kArchRestoreCallerRegisters;
922 : int bytes =
923 1800 : __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
924 900 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
925 : }
926 : // TODO(tebbi): Do we need an lfence here?
927 : break;
928 : }
929 : case kArchJmp:
930 4978083 : AssembleArchJump(i.InputRpo(0));
931 4978141 : break;
932 : case kArchBinarySearchSwitch:
933 34384 : AssembleArchBinarySearchSwitch(instr);
934 34385 : break;
935 : case kArchLookupSwitch:
936 0 : AssembleArchLookupSwitch(instr);
937 0 : break;
938 : case kArchTableSwitch:
939 344 : AssembleArchTableSwitch(instr);
940 344 : break;
941 : case kArchComment:
942 4 : __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
943 4 : break;
944 : case kArchDebugAbort:
945 : DCHECK(i.InputRegister(0) == rdx);
946 160 : if (!frame_access_state()->has_frame()) {
947 : // We don't actually want to generate a pile of code for this, so just
948 : // claim there is a stack frame, without generating one.
949 16 : FrameScope scope(tasm(), StackFrame::NONE);
950 16 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
951 16 : RelocInfo::CODE_TARGET);
952 : } else {
953 144 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
954 144 : RelocInfo::CODE_TARGET);
955 : }
956 160 : __ int3();
957 : unwinding_info_writer_.MarkBlockWillExit();
958 : break;
959 : case kArchDebugBreak:
960 256122 : __ int3();
961 256122 : break;
962 : case kArchThrowTerminator:
963 : unwinding_info_writer_.MarkBlockWillExit();
964 : break;
965 : case kArchNop:
966 : // don't emit code for nops.
967 : break;
968 : case kArchDeoptimize: {
969 : int deopt_state_id =
970 44616 : BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
971 : CodeGenResult result =
972 44616 : AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
973 44616 : if (result != kSuccess) return result;
974 : unwinding_info_writer_.MarkBlockWillExit();
975 : break;
976 : }
977 : case kArchRet:
978 2999241 : AssembleReturn(instr->InputAt(0));
979 2999140 : break;
980 : case kArchStackPointer:
981 0 : __ movq(i.OutputRegister(), rsp);
982 : break;
983 : case kArchFramePointer:
984 32802 : __ movq(i.OutputRegister(), rbp);
985 : break;
986 : case kArchParentFramePointer:
987 58132 : if (frame_access_state()->has_frame()) {
988 80148 : __ movq(i.OutputRegister(), Operand(rbp, 0));
989 : } else {
990 31416 : __ movq(i.OutputRegister(), rbp);
991 : }
992 : break;
993 : case kArchTruncateDoubleToI: {
994 : auto result = i.OutputRegister();
995 : auto input = i.InputDoubleRegister(0);
996 : auto ool = new (zone()) OutOfLineTruncateDoubleToI(
997 : this, result, input, DetermineStubCallMode(),
998 106953 : &unwinding_info_writer_);
999 : // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1000 : // use of Cvttsd2siq requires the movl below to avoid sign extension.
1001 53475 : __ Cvttsd2siq(result, input);
1002 53478 : __ cmpq(result, Immediate(1));
1003 53478 : __ j(overflow, ool->entry());
1004 53478 : __ bind(ool->exit());
1005 : __ movl(result, result);
1006 : break;
1007 : }
1008 : case kArchStoreWithWriteBarrier: {
1009 : RecordWriteMode mode =
1010 : static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1011 : Register object = i.InputRegister(0);
1012 320388 : size_t index = 0;
1013 320388 : Operand operand = i.MemoryOperand(&index);
1014 320388 : Register value = i.InputRegister(index);
1015 : Register scratch0 = i.TempRegister(0);
1016 : Register scratch1 = i.TempRegister(1);
1017 : auto ool = new (zone())
1018 : OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1019 640776 : mode, DetermineStubCallMode());
1020 320388 : __ StoreTaggedField(operand, value);
1021 : __ CheckPageFlag(object, scratch0,
1022 : MemoryChunk::kPointersFromHereAreInterestingMask,
1023 320388 : not_zero, ool->entry());
1024 320389 : __ bind(ool->exit());
1025 : break;
1026 : }
1027 : case kArchWordPoisonOnSpeculation:
1028 : DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1029 0 : __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1030 : break;
1031 : case kLFence:
1032 0 : __ lfence();
1033 0 : break;
1034 : case kArchStackSlot: {
1035 : FrameOffset offset =
1036 367595 : frame_access_state()->GetFrameOffset(i.InputInt32(0));
1037 367599 : Register base = offset.from_stack_pointer() ? rsp : rbp;
1038 1102820 : __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1039 : break;
1040 : }
1041 : case kIeee754Float64Acos:
1042 116 : ASSEMBLE_IEEE754_UNOP(acos);
1043 116 : break;
1044 : case kIeee754Float64Acosh:
1045 116 : ASSEMBLE_IEEE754_UNOP(acosh);
1046 116 : break;
1047 : case kIeee754Float64Asin:
1048 116 : ASSEMBLE_IEEE754_UNOP(asin);
1049 116 : break;
1050 : case kIeee754Float64Asinh:
1051 116 : ASSEMBLE_IEEE754_UNOP(asinh);
1052 116 : break;
1053 : case kIeee754Float64Atan:
1054 133 : ASSEMBLE_IEEE754_UNOP(atan);
1055 133 : break;
1056 : case kIeee754Float64Atanh:
1057 116 : ASSEMBLE_IEEE754_UNOP(atanh);
1058 116 : break;
1059 : case kIeee754Float64Atan2:
1060 129 : ASSEMBLE_IEEE754_BINOP(atan2);
1061 129 : break;
1062 : case kIeee754Float64Cbrt:
1063 116 : ASSEMBLE_IEEE754_UNOP(cbrt);
1064 116 : break;
1065 : case kIeee754Float64Cos:
1066 271 : ASSEMBLE_IEEE754_UNOP(cos);
1067 271 : break;
1068 : case kIeee754Float64Cosh:
1069 123 : ASSEMBLE_IEEE754_UNOP(cosh);
1070 123 : break;
1071 : case kIeee754Float64Exp:
1072 147 : ASSEMBLE_IEEE754_UNOP(exp);
1073 147 : break;
1074 : case kIeee754Float64Expm1:
1075 123 : ASSEMBLE_IEEE754_UNOP(expm1);
1076 123 : break;
1077 : case kIeee754Float64Log:
1078 252 : ASSEMBLE_IEEE754_UNOP(log);
1079 252 : break;
1080 : case kIeee754Float64Log1p:
1081 116 : ASSEMBLE_IEEE754_UNOP(log1p);
1082 116 : break;
1083 : case kIeee754Float64Log2:
1084 116 : ASSEMBLE_IEEE754_UNOP(log2);
1085 116 : break;
1086 : case kIeee754Float64Log10:
1087 116 : ASSEMBLE_IEEE754_UNOP(log10);
1088 116 : break;
1089 : case kIeee754Float64Pow:
1090 334 : ASSEMBLE_IEEE754_BINOP(pow);
1091 334 : break;
1092 : case kIeee754Float64Sin:
1093 268 : ASSEMBLE_IEEE754_UNOP(sin);
1094 268 : break;
1095 : case kIeee754Float64Sinh:
1096 123 : ASSEMBLE_IEEE754_UNOP(sinh);
1097 123 : break;
1098 : case kIeee754Float64Tan:
1099 168 : ASSEMBLE_IEEE754_UNOP(tan);
1100 168 : break;
1101 : case kIeee754Float64Tanh:
1102 123 : ASSEMBLE_IEEE754_UNOP(tanh);
1103 123 : break;
1104 : case kX64Add32:
1105 292303 : ASSEMBLE_BINOP(addl);
1106 : break;
1107 : case kX64Add:
1108 306896 : ASSEMBLE_BINOP(addq);
1109 : break;
1110 : case kX64Sub32:
1111 182980 : ASSEMBLE_BINOP(subl);
1112 : break;
1113 : case kX64Sub:
1114 221018 : ASSEMBLE_BINOP(subq);
1115 : break;
1116 : case kX64And32:
1117 720342 : ASSEMBLE_BINOP(andl);
1118 : break;
1119 : case kX64And:
1120 932921 : ASSEMBLE_BINOP(andq);
1121 : break;
1122 : case kX64Cmp8:
1123 36197 : ASSEMBLE_COMPARE(cmpb);
1124 : break;
1125 : case kX64Cmp16:
1126 1297156 : ASSEMBLE_COMPARE(cmpw);
1127 : break;
1128 : case kX64Cmp32:
1129 4747476 : ASSEMBLE_COMPARE(cmpl);
1130 : break;
1131 : case kX64Cmp:
1132 8594033 : ASSEMBLE_COMPARE(cmpq);
1133 : break;
1134 : case kX64Test8:
1135 320461 : ASSEMBLE_COMPARE(testb);
1136 : break;
1137 : case kX64Test16:
1138 91592 : ASSEMBLE_COMPARE(testw);
1139 : break;
1140 : case kX64Test32:
1141 476559 : ASSEMBLE_COMPARE(testl);
1142 : break;
1143 : case kX64Test:
1144 2674568 : ASSEMBLE_COMPARE(testq);
1145 : break;
1146 : case kX64Imul32:
1147 157444 : ASSEMBLE_MULT(imull);
1148 : break;
1149 : case kX64Imul:
1150 62116 : ASSEMBLE_MULT(imulq);
1151 : break;
1152 : case kX64ImulHigh32:
1153 4195 : if (instr->InputAt(1)->IsRegister()) {
1154 4195 : __ imull(i.InputRegister(1));
1155 : } else {
1156 0 : __ imull(i.InputOperand(1));
1157 : }
1158 : break;
1159 : case kX64UmulHigh32:
1160 1642 : if (instr->InputAt(1)->IsRegister()) {
1161 1642 : __ mull(i.InputRegister(1));
1162 : } else {
1163 0 : __ mull(i.InputOperand(1));
1164 : }
1165 : break;
1166 : case kX64Idiv32:
1167 31993 : __ cdq();
1168 : __ idivl(i.InputRegister(1));
1169 : break;
1170 : case kX64Idiv:
1171 2732 : __ cqo();
1172 : __ idivq(i.InputRegister(1));
1173 : break;
1174 : case kX64Udiv32:
1175 29117 : __ xorl(rdx, rdx);
1176 : __ divl(i.InputRegister(1));
1177 : break;
1178 : case kX64Udiv:
1179 1768 : __ xorq(rdx, rdx);
1180 : __ divq(i.InputRegister(1));
1181 : break;
1182 : case kX64Not:
1183 88 : ASSEMBLE_UNOP(notq);
1184 : break;
1185 : case kX64Not32:
1186 5556 : ASSEMBLE_UNOP(notl);
1187 : break;
1188 : case kX64Neg:
1189 17454 : ASSEMBLE_UNOP(negq);
1190 : break;
1191 : case kX64Neg32:
1192 12838 : ASSEMBLE_UNOP(negl);
1193 : break;
1194 : case kX64Or32:
1195 317969 : ASSEMBLE_BINOP(orl);
1196 : break;
1197 : case kX64Or:
1198 374900 : ASSEMBLE_BINOP(orq);
1199 : break;
1200 : case kX64Xor32:
1201 79853 : ASSEMBLE_BINOP(xorl);
1202 : break;
1203 : case kX64Xor:
1204 3272 : ASSEMBLE_BINOP(xorq);
1205 : break;
1206 : case kX64Shl32:
1207 118015 : ASSEMBLE_SHIFT(shll, 5);
1208 : break;
1209 : case kX64Shl:
1210 1430416 : ASSEMBLE_SHIFT(shlq, 6);
1211 : break;
1212 : case kX64Shr32:
1213 393990 : ASSEMBLE_SHIFT(shrl, 5);
1214 : break;
1215 : case kX64Shr:
1216 1435726 : ASSEMBLE_SHIFT(shrq, 6);
1217 : break;
1218 : case kX64Sar32:
1219 98792 : ASSEMBLE_SHIFT(sarl, 5);
1220 : break;
1221 : case kX64Sar:
1222 714344 : ASSEMBLE_SHIFT(sarq, 6);
1223 : break;
1224 : case kX64Ror32:
1225 110783 : ASSEMBLE_SHIFT(rorl, 5);
1226 : break;
1227 : case kX64Ror:
1228 338 : ASSEMBLE_SHIFT(rorq, 6);
1229 : break;
1230 : case kX64Lzcnt:
1231 34 : if (instr->InputAt(0)->IsRegister()) {
1232 34 : __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1233 : } else {
1234 0 : __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1235 : }
1236 : break;
1237 : case kX64Lzcnt32:
1238 664 : if (instr->InputAt(0)->IsRegister()) {
1239 624 : __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1240 : } else {
1241 80 : __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1242 : }
1243 : break;
1244 : case kX64Tzcnt:
1245 34 : if (instr->InputAt(0)->IsRegister()) {
1246 34 : __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1247 : } else {
1248 0 : __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1249 : }
1250 : break;
1251 : case kX64Tzcnt32:
1252 332 : if (instr->InputAt(0)->IsRegister()) {
1253 332 : __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1254 : } else {
1255 0 : __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1256 : }
1257 : break;
1258 : case kX64Popcnt:
1259 42 : if (instr->InputAt(0)->IsRegister()) {
1260 42 : __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1261 : } else {
1262 0 : __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1263 : }
1264 : break;
1265 : case kX64Popcnt32:
1266 80 : if (instr->InputAt(0)->IsRegister()) {
1267 80 : __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1268 : } else {
1269 0 : __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1270 : }
1271 : break;
1272 : case kX64Bswap:
1273 12 : __ bswapq(i.OutputRegister());
1274 12 : break;
1275 : case kX64Bswap32:
1276 44 : __ bswapl(i.OutputRegister());
1277 44 : break;
1278 : case kSSEFloat32Cmp:
1279 0 : ASSEMBLE_SSE_BINOP(Ucomiss);
1280 : break;
1281 : case kSSEFloat32Add:
1282 0 : ASSEMBLE_SSE_BINOP(addss);
1283 : break;
1284 : case kSSEFloat32Sub:
1285 0 : ASSEMBLE_SSE_BINOP(subss);
1286 : break;
1287 : case kSSEFloat32Mul:
1288 0 : ASSEMBLE_SSE_BINOP(mulss);
1289 : break;
1290 : case kSSEFloat32Div:
1291 0 : ASSEMBLE_SSE_BINOP(divss);
1292 : // Don't delete this mov. It may improve performance on some CPUs,
1293 : // when there is a (v)mulss depending on the result.
1294 0 : __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1295 0 : break;
1296 : case kSSEFloat32Abs: {
1297 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1298 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1299 0 : __ psrlq(kScratchDoubleReg, 33);
1300 0 : __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1301 0 : break;
1302 : }
1303 : case kSSEFloat32Neg: {
1304 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1305 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1306 0 : __ psllq(kScratchDoubleReg, 31);
1307 0 : __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1308 0 : break;
1309 : }
1310 : case kSSEFloat32Sqrt:
1311 364 : ASSEMBLE_SSE_UNOP(sqrtss);
1312 : break;
1313 : case kSSEFloat32ToFloat64:
1314 40904 : ASSEMBLE_SSE_UNOP(Cvtss2sd);
1315 : break;
1316 : case kSSEFloat32Round: {
1317 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1318 : RoundingMode const mode =
1319 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1320 : __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1321 : break;
1322 : }
1323 : case kSSEFloat32ToInt32:
1324 348 : if (instr->InputAt(0)->IsFPRegister()) {
1325 348 : __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1326 : } else {
1327 0 : __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1328 : }
1329 : break;
1330 : case kSSEFloat32ToUint32: {
1331 56 : if (instr->InputAt(0)->IsFPRegister()) {
1332 56 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1333 : } else {
1334 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1335 : }
1336 : break;
1337 : }
1338 : case kSSEFloat64Cmp:
1339 1840 : ASSEMBLE_SSE_BINOP(Ucomisd);
1340 : break;
1341 : case kSSEFloat64Add:
1342 632 : ASSEMBLE_SSE_BINOP(addsd);
1343 : break;
1344 : case kSSEFloat64Sub:
1345 468 : ASSEMBLE_SSE_BINOP(subsd);
1346 : break;
1347 : case kSSEFloat64Mul:
1348 56 : ASSEMBLE_SSE_BINOP(mulsd);
1349 : break;
1350 : case kSSEFloat64Div:
1351 52 : ASSEMBLE_SSE_BINOP(divsd);
1352 : // Don't delete this mov. It may improve performance on some CPUs,
1353 : // when there is a (v)mulsd depending on the result.
1354 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1355 : break;
1356 : case kSSEFloat64Mod: {
1357 1576 : __ subq(rsp, Immediate(kDoubleSize));
1358 1576 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1359 1576 : kDoubleSize);
1360 : // Move values to st(0) and st(1).
1361 3152 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1362 1576 : __ fld_d(Operand(rsp, 0));
1363 3152 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1364 1576 : __ fld_d(Operand(rsp, 0));
1365 : // Loop while fprem isn't done.
1366 1576 : Label mod_loop;
1367 1576 : __ bind(&mod_loop);
1368 : // This instructions traps on all kinds inputs, but we are assuming the
1369 : // floating point control word is set to ignore them all.
1370 1576 : __ fprem();
1371 : // The following 2 instruction implicitly use rax.
1372 1576 : __ fnstsw_ax();
1373 1576 : if (CpuFeatures::IsSupported(SAHF)) {
1374 : CpuFeatureScope sahf_scope(tasm(), SAHF);
1375 1544 : __ sahf();
1376 : } else {
1377 : __ shrl(rax, Immediate(8));
1378 : __ andl(rax, Immediate(0xFF));
1379 32 : __ pushq(rax);
1380 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1381 32 : kSystemPointerSize);
1382 32 : __ popfq();
1383 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1384 32 : -kSystemPointerSize);
1385 : }
1386 1576 : __ j(parity_even, &mod_loop);
1387 : // Move output to stack and clean up.
1388 1576 : __ fstp(1);
1389 1576 : __ fstp_d(Operand(rsp, 0));
1390 3152 : __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1391 : __ addq(rsp, Immediate(kDoubleSize));
1392 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1393 1576 : -kDoubleSize);
1394 : break;
1395 : }
1396 : case kSSEFloat32Max: {
1397 66 : Label compare_swap, done_compare;
1398 66 : if (instr->InputAt(1)->IsFPRegister()) {
1399 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1400 : } else {
1401 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1402 : }
1403 : auto ool =
1404 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1405 66 : __ j(parity_even, ool->entry());
1406 66 : __ j(above, &done_compare, Label::kNear);
1407 66 : __ j(below, &compare_swap, Label::kNear);
1408 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1409 : __ testl(kScratchRegister, Immediate(1));
1410 66 : __ j(zero, &done_compare, Label::kNear);
1411 66 : __ bind(&compare_swap);
1412 66 : if (instr->InputAt(1)->IsFPRegister()) {
1413 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1414 : } else {
1415 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1416 : }
1417 66 : __ bind(&done_compare);
1418 66 : __ bind(ool->exit());
1419 : break;
1420 : }
1421 : case kSSEFloat32Min: {
1422 66 : Label compare_swap, done_compare;
1423 66 : if (instr->InputAt(1)->IsFPRegister()) {
1424 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1425 : } else {
1426 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1427 : }
1428 : auto ool =
1429 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1430 66 : __ j(parity_even, ool->entry());
1431 66 : __ j(below, &done_compare, Label::kNear);
1432 66 : __ j(above, &compare_swap, Label::kNear);
1433 66 : if (instr->InputAt(1)->IsFPRegister()) {
1434 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1435 : } else {
1436 0 : __ Movss(kScratchDoubleReg, i.InputOperand(1));
1437 : __ Movmskps(kScratchRegister, kScratchDoubleReg);
1438 : }
1439 : __ testl(kScratchRegister, Immediate(1));
1440 66 : __ j(zero, &done_compare, Label::kNear);
1441 66 : __ bind(&compare_swap);
1442 66 : if (instr->InputAt(1)->IsFPRegister()) {
1443 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1444 : } else {
1445 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1446 : }
1447 66 : __ bind(&done_compare);
1448 66 : __ bind(ool->exit());
1449 : break;
1450 : }
1451 : case kSSEFloat64Max: {
1452 252 : Label compare_swap, done_compare;
1453 252 : if (instr->InputAt(1)->IsFPRegister()) {
1454 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1455 : } else {
1456 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1457 : }
1458 : auto ool =
1459 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1460 252 : __ j(parity_even, ool->entry());
1461 252 : __ j(above, &done_compare, Label::kNear);
1462 252 : __ j(below, &compare_swap, Label::kNear);
1463 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1464 : __ testl(kScratchRegister, Immediate(1));
1465 252 : __ j(zero, &done_compare, Label::kNear);
1466 252 : __ bind(&compare_swap);
1467 252 : if (instr->InputAt(1)->IsFPRegister()) {
1468 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1469 : } else {
1470 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1471 : }
1472 252 : __ bind(&done_compare);
1473 252 : __ bind(ool->exit());
1474 : break;
1475 : }
1476 : case kSSEFloat64Min: {
1477 339 : Label compare_swap, done_compare;
1478 339 : if (instr->InputAt(1)->IsFPRegister()) {
1479 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1480 : } else {
1481 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1482 : }
1483 : auto ool =
1484 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1485 339 : __ j(parity_even, ool->entry());
1486 339 : __ j(below, &done_compare, Label::kNear);
1487 339 : __ j(above, &compare_swap, Label::kNear);
1488 339 : if (instr->InputAt(1)->IsFPRegister()) {
1489 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1490 : } else {
1491 0 : __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1492 : __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1493 : }
1494 : __ testl(kScratchRegister, Immediate(1));
1495 339 : __ j(zero, &done_compare, Label::kNear);
1496 339 : __ bind(&compare_swap);
1497 339 : if (instr->InputAt(1)->IsFPRegister()) {
1498 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1499 : } else {
1500 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1501 : }
1502 339 : __ bind(&done_compare);
1503 339 : __ bind(ool->exit());
1504 : break;
1505 : }
1506 : case kSSEFloat64Abs: {
1507 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1508 6 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1509 6 : __ psrlq(kScratchDoubleReg, 1);
1510 6 : __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1511 6 : break;
1512 : }
1513 : case kSSEFloat64Neg: {
1514 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1515 76 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1516 76 : __ psllq(kScratchDoubleReg, 63);
1517 76 : __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1518 76 : break;
1519 : }
1520 : case kSSEFloat64Sqrt:
1521 415 : ASSEMBLE_SSE_UNOP(Sqrtsd);
1522 : break;
1523 : case kSSEFloat64Round: {
1524 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1525 : RoundingMode const mode =
1526 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1527 : __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1528 : break;
1529 : }
1530 : case kSSEFloat64ToFloat32:
1531 36016 : ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1532 : break;
1533 : case kSSEFloat64ToInt32:
1534 128088 : if (instr->InputAt(0)->IsFPRegister()) {
1535 107823 : __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1536 : } else {
1537 40530 : __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1538 : }
1539 : break;
1540 : case kSSEFloat64ToUint32: {
1541 702 : if (instr->InputAt(0)->IsFPRegister()) {
1542 702 : __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1543 : } else {
1544 0 : __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1545 : }
1546 1404 : if (MiscField::decode(instr->opcode())) {
1547 642 : __ AssertZeroExtended(i.OutputRegister());
1548 : }
1549 : break;
1550 : }
1551 : case kSSEFloat32ToInt64:
1552 52 : if (instr->InputAt(0)->IsFPRegister()) {
1553 52 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1554 : } else {
1555 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1556 : }
1557 52 : if (instr->OutputCount() > 1) {
1558 48 : __ Set(i.OutputRegister(1), 1);
1559 48 : Label done;
1560 48 : Label fail;
1561 : __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1562 48 : if (instr->InputAt(0)->IsFPRegister()) {
1563 : __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1564 : } else {
1565 0 : __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1566 : }
1567 : // If the input is NaN, then the conversion fails.
1568 48 : __ j(parity_even, &fail);
1569 : // If the input is INT64_MIN, then the conversion succeeds.
1570 48 : __ j(equal, &done);
1571 : __ cmpq(i.OutputRegister(0), Immediate(1));
1572 : // If the conversion results in INT64_MIN, but the input was not
1573 : // INT64_MIN, then the conversion fails.
1574 48 : __ j(no_overflow, &done);
1575 48 : __ bind(&fail);
1576 48 : __ Set(i.OutputRegister(1), 0);
1577 48 : __ bind(&done);
1578 : }
1579 : break;
1580 : case kSSEFloat64ToInt64:
1581 1142 : if (instr->InputAt(0)->IsFPRegister()) {
1582 1141 : __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1583 : } else {
1584 2 : __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1585 : }
1586 1149 : if (instr->OutputCount() > 1) {
1587 1018 : __ Set(i.OutputRegister(1), 1);
1588 1019 : Label done;
1589 1019 : Label fail;
1590 : __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1591 1015 : if (instr->InputAt(0)->IsFPRegister()) {
1592 : __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1593 : } else {
1594 0 : __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1595 : }
1596 : // If the input is NaN, then the conversion fails.
1597 1017 : __ j(parity_even, &fail);
1598 : // If the input is INT64_MIN, then the conversion succeeds.
1599 1019 : __ j(equal, &done);
1600 : __ cmpq(i.OutputRegister(0), Immediate(1));
1601 : // If the conversion results in INT64_MIN, but the input was not
1602 : // INT64_MIN, then the conversion fails.
1603 1019 : __ j(no_overflow, &done);
1604 1016 : __ bind(&fail);
1605 1018 : __ Set(i.OutputRegister(1), 0);
1606 1019 : __ bind(&done);
1607 : }
1608 : break;
1609 : case kSSEFloat32ToUint64: {
1610 52 : Label fail;
1611 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1612 52 : if (instr->InputAt(0)->IsFPRegister()) {
1613 52 : __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1614 : } else {
1615 0 : __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1616 : }
1617 100 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1618 52 : __ bind(&fail);
1619 : break;
1620 : }
1621 : case kSSEFloat64ToUint64: {
1622 2972 : Label fail;
1623 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1624 2972 : if (instr->InputAt(0)->IsFPRegister()) {
1625 2972 : __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1626 : } else {
1627 0 : __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1628 : }
1629 3028 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1630 2972 : __ bind(&fail);
1631 : break;
1632 : }
1633 : case kSSEInt32ToFloat64:
1634 378584 : if (instr->InputAt(0)->IsRegister()) {
1635 374742 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1636 : } else {
1637 7684 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1638 : }
1639 : break;
1640 : case kSSEInt32ToFloat32:
1641 984 : if (instr->InputAt(0)->IsRegister()) {
1642 976 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1643 : } else {
1644 16 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1645 : }
1646 : break;
1647 : case kSSEInt64ToFloat32:
1648 48 : if (instr->InputAt(0)->IsRegister()) {
1649 48 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1650 : } else {
1651 0 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1652 : }
1653 : break;
1654 : case kSSEInt64ToFloat64:
1655 3489 : if (instr->InputAt(0)->IsRegister()) {
1656 1489 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1657 : } else {
1658 4000 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1659 : }
1660 : break;
1661 : case kSSEUint64ToFloat32:
1662 31 : if (instr->InputAt(0)->IsRegister()) {
1663 31 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1664 : } else {
1665 0 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1666 : }
1667 : break;
1668 : case kSSEUint64ToFloat64:
1669 3608 : if (instr->InputAt(0)->IsRegister()) {
1670 2376 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1671 : } else {
1672 2464 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1673 : }
1674 : break;
1675 : case kSSEUint32ToFloat64:
1676 11332 : if (instr->InputAt(0)->IsRegister()) {
1677 391 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1678 : } else {
1679 21882 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1680 : }
1681 : break;
1682 : case kSSEUint32ToFloat32:
1683 88 : if (instr->InputAt(0)->IsRegister()) {
1684 88 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1685 : } else {
1686 0 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1687 : }
1688 : break;
1689 : case kSSEFloat64ExtractLowWord32:
1690 116 : if (instr->InputAt(0)->IsFPStackSlot()) {
1691 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
1692 : } else {
1693 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1694 : }
1695 : break;
1696 : case kSSEFloat64ExtractHighWord32:
1697 101385 : if (instr->InputAt(0)->IsFPStackSlot()) {
1698 120244 : __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1699 : } else {
1700 41263 : __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1701 : }
1702 : break;
1703 : case kSSEFloat64InsertLowWord32:
1704 4 : if (instr->InputAt(1)->IsRegister()) {
1705 4 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1706 : } else {
1707 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1708 : }
1709 : break;
1710 : case kSSEFloat64InsertHighWord32:
1711 116 : if (instr->InputAt(1)->IsRegister()) {
1712 116 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1713 : } else {
1714 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1715 : }
1716 : break;
1717 : case kSSEFloat64LoadLowWord32:
1718 112 : if (instr->InputAt(0)->IsRegister()) {
1719 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1720 : } else {
1721 0 : __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1722 : }
1723 : break;
1724 : case kAVXFloat32Cmp: {
1725 : CpuFeatureScope avx_scope(tasm(), AVX);
1726 1547 : if (instr->InputAt(1)->IsFPRegister()) {
1727 1523 : __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1728 : } else {
1729 48 : __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1730 : }
1731 : break;
1732 : }
1733 : case kAVXFloat32Add:
1734 3618 : ASSEMBLE_AVX_BINOP(vaddss);
1735 : break;
1736 : case kAVXFloat32Sub:
1737 5242 : ASSEMBLE_AVX_BINOP(vsubss);
1738 : break;
1739 : case kAVXFloat32Mul:
1740 1780 : ASSEMBLE_AVX_BINOP(vmulss);
1741 : break;
1742 : case kAVXFloat32Div:
1743 770 : ASSEMBLE_AVX_BINOP(vdivss);
1744 : // Don't delete this mov. It may improve performance on some CPUs,
1745 : // when there is a (v)mulss depending on the result.
1746 : __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1747 : break;
1748 : case kAVXFloat64Cmp: {
1749 : CpuFeatureScope avx_scope(tasm(), AVX);
1750 261574 : if (instr->InputAt(1)->IsFPRegister()) {
1751 240948 : __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1752 : } else {
1753 41252 : __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1754 : }
1755 : break;
1756 : }
1757 : case kAVXFloat64Add:
1758 158512 : ASSEMBLE_AVX_BINOP(vaddsd);
1759 : break;
1760 : case kAVXFloat64Sub:
1761 31168 : ASSEMBLE_AVX_BINOP(vsubsd);
1762 : break;
1763 : case kAVXFloat64Mul:
1764 23750 : ASSEMBLE_AVX_BINOP(vmulsd);
1765 : break;
1766 : case kAVXFloat64Div:
1767 23990 : ASSEMBLE_AVX_BINOP(vdivsd);
1768 : // Don't delete this mov. It may improve performance on some CPUs,
1769 : // when there is a (v)mulsd depending on the result.
1770 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1771 : break;
1772 : case kAVXFloat32Abs: {
1773 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1774 : CpuFeatureScope avx_scope(tasm(), AVX);
1775 66 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1776 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1777 66 : if (instr->InputAt(0)->IsFPRegister()) {
1778 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1779 : i.InputDoubleRegister(0));
1780 : } else {
1781 0 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1782 : i.InputOperand(0));
1783 : }
1784 : break;
1785 : }
1786 : case kAVXFloat32Neg: {
1787 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1788 : CpuFeatureScope avx_scope(tasm(), AVX);
1789 176 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1790 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1791 176 : if (instr->InputAt(0)->IsFPRegister()) {
1792 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1793 : i.InputDoubleRegister(0));
1794 : } else {
1795 0 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1796 : i.InputOperand(0));
1797 : }
1798 : break;
1799 : }
1800 : case kAVXFloat64Abs: {
1801 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1802 : CpuFeatureScope avx_scope(tasm(), AVX);
1803 621 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1804 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1805 621 : if (instr->InputAt(0)->IsFPRegister()) {
1806 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1807 : i.InputDoubleRegister(0));
1808 : } else {
1809 0 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1810 : i.InputOperand(0));
1811 : }
1812 : break;
1813 : }
1814 : case kAVXFloat64Neg: {
1815 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1816 : CpuFeatureScope avx_scope(tasm(), AVX);
1817 9650 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1818 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1819 9649 : if (instr->InputAt(0)->IsFPRegister()) {
1820 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1821 : i.InputDoubleRegister(0));
1822 : } else {
1823 83 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1824 : i.InputOperand(0));
1825 : }
1826 : break;
1827 : }
1828 : case kSSEFloat64SilenceNaN:
1829 : __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1830 : __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1831 : break;
1832 : case kX64Movsxbl:
1833 44834 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1834 90287 : ASSEMBLE_MOVX(movsxbl);
1835 44834 : __ AssertZeroExtended(i.OutputRegister());
1836 44834 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1837 44834 : break;
1838 : case kX64Movzxbl:
1839 176740 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1840 354629 : ASSEMBLE_MOVX(movzxbl);
1841 176741 : __ AssertZeroExtended(i.OutputRegister());
1842 176741 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1843 176741 : break;
1844 : case kX64Movsxbq:
1845 13515 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1846 27042 : ASSEMBLE_MOVX(movsxbq);
1847 13515 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1848 13515 : break;
1849 : case kX64Movzxbq:
1850 14020 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1851 28040 : ASSEMBLE_MOVX(movzxbq);
1852 14020 : __ AssertZeroExtended(i.OutputRegister());
1853 14020 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1854 14020 : break;
1855 : case kX64Movb: {
1856 79996 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1857 79996 : size_t index = 0;
1858 79996 : Operand operand = i.MemoryOperand(&index);
1859 159996 : if (HasImmediateInput(instr, index)) {
1860 13484 : __ movb(operand, Immediate(i.InputInt8(index)));
1861 : } else {
1862 73256 : __ movb(operand, i.InputRegister(index));
1863 : }
1864 79998 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1865 : break;
1866 : }
1867 : case kX64Movsxwl:
1868 10503 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1869 21486 : ASSEMBLE_MOVX(movsxwl);
1870 10503 : __ AssertZeroExtended(i.OutputRegister());
1871 10503 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1872 10503 : break;
1873 : case kX64Movzxwl:
1874 158231 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1875 322943 : ASSEMBLE_MOVX(movzxwl);
1876 158234 : __ AssertZeroExtended(i.OutputRegister());
1877 158234 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1878 158235 : break;
1879 : case kX64Movsxwq:
1880 9027 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1881 18058 : ASSEMBLE_MOVX(movsxwq);
1882 : break;
1883 : case kX64Movzxwq:
1884 672 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1885 1344 : ASSEMBLE_MOVX(movzxwq);
1886 672 : __ AssertZeroExtended(i.OutputRegister());
1887 672 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1888 672 : break;
1889 : case kX64Movw: {
1890 15370 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1891 15370 : size_t index = 0;
1892 15370 : Operand operand = i.MemoryOperand(&index);
1893 30740 : if (HasImmediateInput(instr, index)) {
1894 1830 : __ movw(operand, Immediate(i.InputInt16(index)));
1895 : } else {
1896 14455 : __ movw(operand, i.InputRegister(index));
1897 : }
1898 15370 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1899 : break;
1900 : }
1901 : case kX64Movl:
1902 2519239 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1903 2518456 : if (instr->HasOutput()) {
1904 969402 : if (instr->addressing_mode() == kMode_None) {
1905 60747 : if (instr->InputAt(0)->IsRegister()) {
1906 57179 : __ movl(i.OutputRegister(), i.InputRegister(0));
1907 : } else {
1908 7134 : __ movl(i.OutputRegister(), i.InputOperand(0));
1909 : }
1910 : } else {
1911 908666 : __ movl(i.OutputRegister(), i.MemoryOperand());
1912 : }
1913 969454 : __ AssertZeroExtended(i.OutputRegister());
1914 : } else {
1915 1549054 : size_t index = 0;
1916 1549054 : Operand operand = i.MemoryOperand(&index);
1917 3098732 : if (HasImmediateInput(instr, index)) {
1918 482672 : __ movl(operand, i.InputImmediate(index));
1919 : } else {
1920 1066695 : __ movl(operand, i.InputRegister(index));
1921 : }
1922 : }
1923 2518533 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1924 2518571 : break;
1925 : case kX64Movsxlq:
1926 230911 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1927 546998 : ASSEMBLE_MOVX(movsxlq);
1928 230910 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1929 230911 : break;
1930 : case kX64MovqDecompressTaggedSigned: {
1931 0 : CHECK(instr->HasOutput());
1932 0 : __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1933 0 : break;
1934 : }
1935 : case kX64MovqDecompressTaggedPointer: {
1936 0 : CHECK(instr->HasOutput());
1937 0 : __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1938 0 : break;
1939 : }
1940 : case kX64MovqDecompressAnyTagged: {
1941 0 : CHECK(instr->HasOutput());
1942 0 : __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1943 0 : break;
1944 : }
1945 : case kX64MovqCompressTagged: {
1946 0 : CHECK(!instr->HasOutput());
1947 0 : size_t index = 0;
1948 0 : Operand operand = i.MemoryOperand(&index);
1949 0 : if (HasImmediateInput(instr, index)) {
1950 0 : __ StoreTaggedField(operand, i.InputImmediate(index));
1951 : } else {
1952 0 : __ StoreTaggedField(operand, i.InputRegister(index));
1953 : }
1954 : break;
1955 : }
1956 : case kX64DecompressSigned: {
1957 0 : CHECK(instr->HasOutput());
1958 0 : ASSEMBLE_MOVX(movsxlq);
1959 : break;
1960 : }
1961 : case kX64DecompressPointer: {
1962 0 : CHECK(instr->HasOutput());
1963 0 : ASSEMBLE_MOVX(movsxlq);
1964 0 : __ addq(i.OutputRegister(), kRootRegister);
1965 : break;
1966 : }
1967 : case kX64DecompressAny: {
1968 0 : CHECK(instr->HasOutput());
1969 0 : ASSEMBLE_MOVX(movsxlq);
1970 : // TODO(solanes): Do branchful compute?
1971 : // Branchlessly compute |masked_root|:
1972 : STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
1973 : Register masked_root = kScratchRegister;
1974 0 : __ movl(masked_root, i.OutputRegister());
1975 : __ andl(masked_root, Immediate(kSmiTagMask));
1976 : __ negq(masked_root);
1977 : __ andq(masked_root, kRootRegister);
1978 : // Now this add operation will either leave the value unchanged if it is a
1979 : // smi or add the isolate root if it is a heap object.
1980 : __ addq(i.OutputRegister(), masked_root);
1981 : break;
1982 : }
1983 : // TODO(solanes): Combine into one Compress? They seem to be identical.
1984 : // TODO(solanes): We might get away with doing a no-op in these three cases.
1985 : // The movl instruction is the conservative way for the moment.
1986 : case kX64CompressSigned: {
1987 0 : ASSEMBLE_MOVX(movl);
1988 : break;
1989 : }
1990 : case kX64CompressPointer: {
1991 0 : ASSEMBLE_MOVX(movl);
1992 : break;
1993 : }
1994 : case kX64CompressAny: {
1995 0 : ASSEMBLE_MOVX(movl);
1996 : break;
1997 : }
1998 : case kX64Movq:
1999 8641816 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2000 8641547 : if (instr->HasOutput()) {
2001 5795988 : __ movq(i.OutputRegister(), i.MemoryOperand());
2002 : } else {
2003 2846960 : size_t index = 0;
2004 2846960 : Operand operand = i.MemoryOperand(&index);
2005 5693920 : if (HasImmediateInput(instr, index)) {
2006 21878 : __ movq(operand, i.InputImmediate(index));
2007 : } else {
2008 2825082 : __ movq(operand, i.InputRegister(index));
2009 : }
2010 : }
2011 8643840 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2012 8643423 : break;
2013 : case kX64Movss:
2014 28867 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2015 28869 : if (instr->HasOutput()) {
2016 15709 : __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
2017 : } else {
2018 13158 : size_t index = 0;
2019 13158 : Operand operand = i.MemoryOperand(&index);
2020 26316 : __ movss(operand, i.InputDoubleRegister(index));
2021 : }
2022 : break;
2023 : case kX64Movsd: {
2024 620200 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2025 620234 : if (instr->HasOutput()) {
2026 : const MemoryAccessMode access_mode =
2027 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2028 426145 : if (access_mode == kMemoryAccessPoisoned) {
2029 : // If we have to poison the loaded value, we load into a general
2030 : // purpose register first, mask it with the poison, and move the
2031 : // value from the general purpose register into the double register.
2032 0 : __ movq(kScratchRegister, i.MemoryOperand());
2033 : __ andq(kScratchRegister, kSpeculationPoisonRegister);
2034 : __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2035 : } else {
2036 : __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2037 : }
2038 : } else {
2039 194089 : size_t index = 0;
2040 194089 : Operand operand = i.MemoryOperand(&index);
2041 194090 : __ Movsd(operand, i.InputDoubleRegister(index));
2042 : }
2043 : break;
2044 : }
2045 : case kX64Movdqu: {
2046 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2047 9860 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2048 9860 : if (instr->HasOutput()) {
2049 5780 : __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2050 : } else {
2051 4080 : size_t index = 0;
2052 4080 : Operand operand = i.MemoryOperand(&index);
2053 8160 : __ movdqu(operand, i.InputSimd128Register(index));
2054 : }
2055 : break;
2056 : }
2057 : case kX64BitcastFI:
2058 51102 : if (instr->InputAt(0)->IsFPStackSlot()) {
2059 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
2060 : } else {
2061 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2062 : }
2063 : break;
2064 : case kX64BitcastDL:
2065 50775 : if (instr->InputAt(0)->IsFPStackSlot()) {
2066 0 : __ movq(i.OutputRegister(), i.InputOperand(0));
2067 : } else {
2068 : __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2069 : }
2070 : break;
2071 : case kX64BitcastIF:
2072 327 : if (instr->InputAt(0)->IsRegister()) {
2073 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2074 : } else {
2075 0 : __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2076 : }
2077 : break;
2078 : case kX64BitcastLD:
2079 215 : if (instr->InputAt(0)->IsRegister()) {
2080 : __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2081 : } else {
2082 0 : __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2083 : }
2084 : break;
2085 : case kX64Lea32: {
2086 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2087 : // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2088 : // and addressing mode just happens to work out. The "addl"/"subl" forms
2089 : // in these cases are faster based on measurements.
2090 329177 : if (i.InputRegister(0) == i.OutputRegister()) {
2091 151935 : if (mode == kMode_MRI) {
2092 : int32_t constant_summand = i.InputInt32(1);
2093 : DCHECK_NE(0, constant_summand);
2094 75452 : if (constant_summand > 0) {
2095 54240 : __ addl(i.OutputRegister(), Immediate(constant_summand));
2096 : } else {
2097 21212 : __ subl(i.OutputRegister(),
2098 : Immediate(base::NegateWithWraparound(constant_summand)));
2099 : }
2100 76464 : } else if (mode == kMode_MR1) {
2101 16459 : if (i.InputRegister(1) == i.OutputRegister()) {
2102 4288 : __ shll(i.OutputRegister(), Immediate(1));
2103 : } else {
2104 12171 : __ addl(i.OutputRegister(), i.InputRegister(1));
2105 : }
2106 60005 : } else if (mode == kMode_M2) {
2107 0 : __ shll(i.OutputRegister(), Immediate(1));
2108 60005 : } else if (mode == kMode_M4) {
2109 2942 : __ shll(i.OutputRegister(), Immediate(2));
2110 57063 : } else if (mode == kMode_M8) {
2111 120 : __ shll(i.OutputRegister(), Immediate(3));
2112 : } else {
2113 56943 : __ leal(i.OutputRegister(), i.MemoryOperand());
2114 : }
2115 203608 : } else if (mode == kMode_MR1 &&
2116 : i.InputRegister(1) == i.OutputRegister()) {
2117 17090 : __ addl(i.OutputRegister(), i.InputRegister(0));
2118 : } else {
2119 160150 : __ leal(i.OutputRegister(), i.MemoryOperand());
2120 : }
2121 329198 : __ AssertZeroExtended(i.OutputRegister());
2122 329170 : break;
2123 : }
2124 : case kX64Lea: {
2125 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2126 : // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2127 : // and addressing mode just happens to work out. The "addq"/"subq" forms
2128 : // in these cases are faster based on measurements.
2129 1958704 : if (i.InputRegister(0) == i.OutputRegister()) {
2130 540154 : if (mode == kMode_MRI) {
2131 : int32_t constant_summand = i.InputInt32(1);
2132 437427 : if (constant_summand > 0) {
2133 354263 : __ addq(i.OutputRegister(), Immediate(constant_summand));
2134 83164 : } else if (constant_summand < 0) {
2135 166270 : __ subq(i.OutputRegister(), Immediate(-constant_summand));
2136 : }
2137 102726 : } else if (mode == kMode_MR1) {
2138 50559 : if (i.InputRegister(1) == i.OutputRegister()) {
2139 2714 : __ shlq(i.OutputRegister(), Immediate(1));
2140 : } else {
2141 47845 : __ addq(i.OutputRegister(), i.InputRegister(1));
2142 : }
2143 52167 : } else if (mode == kMode_M2) {
2144 0 : __ shlq(i.OutputRegister(), Immediate(1));
2145 52167 : } else if (mode == kMode_M4) {
2146 336 : __ shlq(i.OutputRegister(), Immediate(2));
2147 51831 : } else if (mode == kMode_M8) {
2148 12792 : __ shlq(i.OutputRegister(), Immediate(3));
2149 : } else {
2150 39039 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2151 : }
2152 1660047 : } else if (mode == kMode_MR1 &&
2153 : i.InputRegister(1) == i.OutputRegister()) {
2154 169038 : __ addq(i.OutputRegister(), i.InputRegister(0));
2155 : } else {
2156 1249549 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2157 : }
2158 : break;
2159 : }
2160 : case kX64Dec32:
2161 0 : __ decl(i.OutputRegister());
2162 : break;
2163 : case kX64Inc32:
2164 0 : __ incl(i.OutputRegister());
2165 : break;
2166 : case kX64Push:
2167 3543484 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2168 18699 : size_t index = 0;
2169 18699 : Operand operand = i.MemoryOperand(&index);
2170 18699 : __ pushq(operand);
2171 : frame_access_state()->IncreaseSPDelta(1);
2172 18699 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2173 18699 : kSystemPointerSize);
2174 3524785 : } else if (HasImmediateInput(instr, 0)) {
2175 523831 : __ pushq(i.InputImmediate(0));
2176 : frame_access_state()->IncreaseSPDelta(1);
2177 523831 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2178 523831 : kSystemPointerSize);
2179 3000954 : } else if (instr->InputAt(0)->IsRegister()) {
2180 2250745 : __ pushq(i.InputRegister(0));
2181 : frame_access_state()->IncreaseSPDelta(1);
2182 2250751 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2183 2250751 : kSystemPointerSize);
2184 1488556 : } else if (instr->InputAt(0)->IsFloatRegister() ||
2185 : instr->InputAt(0)->IsDoubleRegister()) {
2186 : // TODO(titzer): use another machine instruction?
2187 22750 : __ subq(rsp, Immediate(kDoubleSize));
2188 : frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2189 22750 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2190 22750 : kDoubleSize);
2191 45500 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2192 727459 : } else if (instr->InputAt(0)->IsSimd128Register()) {
2193 : // TODO(titzer): use another machine instruction?
2194 168 : __ subq(rsp, Immediate(kSimd128Size));
2195 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2196 : kSystemPointerSize);
2197 168 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2198 168 : kSimd128Size);
2199 336 : __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2200 747148 : } else if (instr->InputAt(0)->IsStackSlot() ||
2201 740644 : instr->InputAt(0)->IsFloatStackSlot() ||
2202 : instr->InputAt(0)->IsDoubleStackSlot()) {
2203 727075 : __ pushq(i.InputOperand(0));
2204 : frame_access_state()->IncreaseSPDelta(1);
2205 727075 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2206 727075 : kSystemPointerSize);
2207 : } else {
2208 : DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2209 216 : __ Movups(kScratchDoubleReg, i.InputOperand(0));
2210 : // TODO(titzer): use another machine instruction?
2211 : __ subq(rsp, Immediate(kSimd128Size));
2212 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2213 : kSystemPointerSize);
2214 216 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2215 216 : kSimd128Size);
2216 432 : __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2217 : }
2218 : break;
2219 : case kX64Poke: {
2220 : int slot = MiscField::decode(instr->opcode());
2221 3392 : if (HasImmediateInput(instr, 0)) {
2222 2264 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2223 : } else {
2224 4520 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2225 : }
2226 : break;
2227 : }
2228 : case kX64Peek: {
2229 : int reverse_slot = i.InputInt32(0);
2230 : int offset =
2231 5067 : FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2232 5067 : if (instr->OutputAt(0)->IsFPRegister()) {
2233 : LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2234 2528 : if (op->representation() == MachineRepresentation::kFloat64) {
2235 2528 : __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2236 : } else {
2237 : DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2238 2528 : __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2239 : }
2240 : } else {
2241 7617 : __ movq(i.OutputRegister(), Operand(rbp, offset));
2242 : }
2243 : break;
2244 : }
2245 : // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2246 : case kX64F32x4Splat: {
2247 140 : XMMRegister dst = i.OutputSimd128Register();
2248 140 : if (instr->InputAt(0)->IsFPRegister()) {
2249 140 : __ movss(dst, i.InputDoubleRegister(0));
2250 : } else {
2251 0 : __ movss(dst, i.InputOperand(0));
2252 : }
2253 140 : __ shufps(dst, dst, 0x0);
2254 : break;
2255 : }
2256 : case kX64F32x4ExtractLane: {
2257 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2258 64 : __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2259 32 : __ movd(i.OutputDoubleRegister(), kScratchRegister);
2260 : break;
2261 : }
2262 : case kX64F32x4ReplaceLane: {
2263 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2264 : // The insertps instruction uses imm8[5:4] to indicate the lane
2265 : // that needs to be replaced.
2266 32 : byte select = i.InputInt8(1) << 4 & 0x30;
2267 32 : if (instr->InputAt(2)->IsFPRegister()) {
2268 64 : __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2269 32 : select);
2270 : } else {
2271 0 : __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2272 : }
2273 : break;
2274 : }
2275 : case kX64F32x4SConvertI32x4: {
2276 4 : __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2277 4 : break;
2278 : }
2279 : case kX64F32x4UConvertI32x4: {
2280 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2281 : DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2282 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2283 : XMMRegister dst = i.OutputSimd128Register();
2284 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2285 4 : __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2286 : __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2287 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2288 4 : __ psrld(dst, 1); // divide by 2 to get in unsigned range
2289 4 : __ cvtdq2ps(dst, dst); // convert hi exactly
2290 4 : __ addps(dst, dst); // double hi, exactly
2291 4 : __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2292 : break;
2293 : }
2294 : case kX64F32x4Abs: {
2295 : XMMRegister dst = i.OutputSimd128Register();
2296 : XMMRegister src = i.InputSimd128Register(0);
2297 4 : if (dst == src) {
2298 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2299 4 : __ psrld(kScratchDoubleReg, 1);
2300 4 : __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2301 : } else {
2302 0 : __ pcmpeqd(dst, dst);
2303 0 : __ psrld(dst, 1);
2304 0 : __ andps(dst, i.InputSimd128Register(0));
2305 : }
2306 : break;
2307 : }
2308 : case kX64F32x4Neg: {
2309 : XMMRegister dst = i.OutputSimd128Register();
2310 : XMMRegister src = i.InputSimd128Register(0);
2311 4 : if (dst == src) {
2312 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2313 4 : __ pslld(kScratchDoubleReg, 31);
2314 4 : __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2315 : } else {
2316 0 : __ pcmpeqd(dst, dst);
2317 0 : __ pslld(dst, 31);
2318 0 : __ xorps(dst, i.InputSimd128Register(0));
2319 : }
2320 : break;
2321 : }
2322 : case kX64F32x4RecipApprox: {
2323 4 : __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2324 4 : break;
2325 : }
2326 : case kX64F32x4RecipSqrtApprox: {
2327 4 : __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2328 4 : break;
2329 : }
2330 : case kX64F32x4Add: {
2331 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2332 12 : __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2333 12 : break;
2334 : }
2335 : case kX64F32x4AddHoriz: {
2336 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2337 : CpuFeatureScope sse_scope(tasm(), SSE3);
2338 4 : __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2339 : break;
2340 : }
2341 : case kX64F32x4Sub: {
2342 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2343 4 : __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2344 4 : break;
2345 : }
2346 : case kX64F32x4Mul: {
2347 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2348 4 : __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2349 4 : break;
2350 : }
2351 : case kX64F32x4Min: {
2352 4 : XMMRegister src1 = i.InputSimd128Register(1),
2353 4 : dst = i.OutputSimd128Register();
2354 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2355 : // The minps instruction doesn't propagate NaNs and +0's in its first
2356 : // operand. Perform minps in both orders, merge the resuls, and adjust.
2357 4 : __ movaps(kScratchDoubleReg, src1);
2358 4 : __ minps(kScratchDoubleReg, dst);
2359 4 : __ minps(dst, src1);
2360 : // propagate -0's and NaNs, which may be non-canonical.
2361 4 : __ orps(kScratchDoubleReg, dst);
2362 : // Canonicalize NaNs by quieting and clearing the payload.
2363 4 : __ cmpps(dst, kScratchDoubleReg, 3);
2364 4 : __ orps(kScratchDoubleReg, dst);
2365 4 : __ psrld(dst, 10);
2366 4 : __ andnps(dst, kScratchDoubleReg);
2367 : break;
2368 : }
2369 : case kX64F32x4Max: {
2370 4 : XMMRegister src1 = i.InputSimd128Register(1),
2371 4 : dst = i.OutputSimd128Register();
2372 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2373 : // The maxps instruction doesn't propagate NaNs and +0's in its first
2374 : // operand. Perform maxps in both orders, merge the resuls, and adjust.
2375 4 : __ movaps(kScratchDoubleReg, src1);
2376 4 : __ maxps(kScratchDoubleReg, dst);
2377 4 : __ maxps(dst, src1);
2378 : // Find discrepancies.
2379 4 : __ xorps(dst, kScratchDoubleReg);
2380 : // Propagate NaNs, which may be non-canonical.
2381 4 : __ orps(kScratchDoubleReg, dst);
2382 : // Propagate sign discrepancy and (subtle) quiet NaNs.
2383 4 : __ subps(kScratchDoubleReg, dst);
2384 : // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2385 4 : __ cmpps(dst, kScratchDoubleReg, 3);
2386 4 : __ psrld(dst, 10);
2387 4 : __ andnps(dst, kScratchDoubleReg);
2388 : break;
2389 : }
2390 : case kX64F32x4Eq: {
2391 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2392 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2393 4 : break;
2394 : }
2395 : case kX64F32x4Ne: {
2396 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2397 4 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2398 4 : break;
2399 : }
2400 : case kX64F32x4Lt: {
2401 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2402 8 : __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2403 : break;
2404 : }
2405 : case kX64F32x4Le: {
2406 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2407 8 : __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2408 : break;
2409 : }
2410 : case kX64I32x4Splat: {
2411 1088 : XMMRegister dst = i.OutputSimd128Register();
2412 1088 : if (instr->InputAt(0)->IsRegister()) {
2413 1088 : __ movd(dst, i.InputRegister(0));
2414 : } else {
2415 0 : __ movd(dst, i.InputOperand(0));
2416 : }
2417 1088 : __ pshufd(dst, dst, 0x0);
2418 : break;
2419 : }
2420 : case kX64I32x4ExtractLane: {
2421 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2422 3816 : __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2423 : break;
2424 : }
2425 : case kX64I32x4ReplaceLane: {
2426 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2427 1784 : if (instr->InputAt(2)->IsRegister()) {
2428 296 : __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2429 296 : i.InputInt8(1));
2430 : } else {
2431 2976 : __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2432 : }
2433 : break;
2434 : }
2435 : case kX64I32x4SConvertF32x4: {
2436 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2437 : XMMRegister dst = i.OutputSimd128Register();
2438 : // NAN->0
2439 4 : __ movaps(kScratchDoubleReg, dst);
2440 : __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2441 : __ pand(dst, kScratchDoubleReg);
2442 : // Set top bit if >= 0 (but not -0.0!)
2443 : __ pxor(kScratchDoubleReg, dst);
2444 : // Convert
2445 4 : __ cvttps2dq(dst, dst);
2446 : // Set top bit if >=0 is now < 0
2447 : __ pand(kScratchDoubleReg, dst);
2448 4 : __ psrad(kScratchDoubleReg, 31);
2449 : // Set positive overflow lanes to 0x7FFFFFFF
2450 : __ pxor(dst, kScratchDoubleReg);
2451 : break;
2452 : }
2453 : case kX64I32x4SConvertI16x8Low: {
2454 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2455 4 : __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2456 : break;
2457 : }
2458 : case kX64I32x4SConvertI16x8High: {
2459 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2460 : XMMRegister dst = i.OutputSimd128Register();
2461 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2462 : __ pmovsxwd(dst, dst);
2463 : break;
2464 : }
2465 : case kX64I32x4Neg: {
2466 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2467 : XMMRegister dst = i.OutputSimd128Register();
2468 : XMMRegister src = i.InputSimd128Register(0);
2469 4 : if (dst == src) {
2470 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2471 : __ psignd(dst, kScratchDoubleReg);
2472 : } else {
2473 0 : __ pxor(dst, dst);
2474 : __ psubd(dst, src);
2475 : }
2476 : break;
2477 : }
2478 : case kX64I32x4Shl: {
2479 248 : __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2480 124 : break;
2481 : }
2482 : case kX64I32x4ShrS: {
2483 248 : __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2484 124 : break;
2485 : }
2486 : case kX64I32x4Add: {
2487 12 : __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2488 : break;
2489 : }
2490 : case kX64I32x4AddHoriz: {
2491 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2492 4 : __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2493 : break;
2494 : }
2495 : case kX64I32x4Sub: {
2496 4 : __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2497 : break;
2498 : }
2499 : case kX64I32x4Mul: {
2500 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2501 4 : __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2502 : break;
2503 : }
2504 : case kX64I32x4MinS: {
2505 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2506 4 : __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2507 : break;
2508 : }
2509 : case kX64I32x4MaxS: {
2510 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2511 4 : __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2512 : break;
2513 : }
2514 : case kX64I32x4Eq: {
2515 12 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2516 : break;
2517 : }
2518 : case kX64I32x4Ne: {
2519 16 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2520 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2521 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2522 : break;
2523 : }
2524 : case kX64I32x4GtS: {
2525 8 : __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2526 : break;
2527 : }
2528 : case kX64I32x4GeS: {
2529 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2530 : XMMRegister dst = i.OutputSimd128Register();
2531 : XMMRegister src = i.InputSimd128Register(1);
2532 8 : __ pminsd(dst, src);
2533 : __ pcmpeqd(dst, src);
2534 : break;
2535 : }
2536 : case kX64I32x4UConvertF32x4: {
2537 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2538 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2539 : XMMRegister dst = i.OutputSimd128Register();
2540 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2541 : // NAN->0, negative->0
2542 4 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2543 4 : __ maxps(dst, kScratchDoubleReg);
2544 : // scratch: float representation of max_signed
2545 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2546 4 : __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2547 4 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2548 : // tmp: convert (src-max_signed).
2549 : // Positive overflow lanes -> 0x7FFFFFFF
2550 : // Negative lanes -> 0
2551 4 : __ movaps(tmp, dst);
2552 4 : __ subps(tmp, kScratchDoubleReg);
2553 : __ cmpleps(kScratchDoubleReg, tmp);
2554 4 : __ cvttps2dq(tmp, tmp);
2555 : __ pxor(tmp, kScratchDoubleReg);
2556 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2557 : __ pmaxsd(tmp, kScratchDoubleReg);
2558 : // convert. Overflow lanes above max_signed will be 0x80000000
2559 4 : __ cvttps2dq(dst, dst);
2560 : // Add (src-max_signed) for overflow lanes.
2561 : __ paddd(dst, tmp);
2562 : break;
2563 : }
2564 : case kX64I32x4UConvertI16x8Low: {
2565 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2566 4 : __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2567 : break;
2568 : }
2569 : case kX64I32x4UConvertI16x8High: {
2570 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2571 : XMMRegister dst = i.OutputSimd128Register();
2572 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2573 : __ pmovzxwd(dst, dst);
2574 : break;
2575 : }
2576 : case kX64I32x4ShrU: {
2577 248 : __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2578 124 : break;
2579 : }
2580 : case kX64I32x4MinU: {
2581 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2582 4 : __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2583 : break;
2584 : }
2585 : case kX64I32x4MaxU: {
2586 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2587 4 : __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2588 : break;
2589 : }
2590 : case kX64I32x4GtU: {
2591 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2592 : XMMRegister dst = i.OutputSimd128Register();
2593 : XMMRegister src = i.InputSimd128Register(1);
2594 8 : __ pmaxud(dst, src);
2595 : __ pcmpeqd(dst, src);
2596 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2597 : __ pxor(dst, kScratchDoubleReg);
2598 : break;
2599 : }
2600 : case kX64I32x4GeU: {
2601 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2602 : XMMRegister dst = i.OutputSimd128Register();
2603 : XMMRegister src = i.InputSimd128Register(1);
2604 8 : __ pminud(dst, src);
2605 : __ pcmpeqd(dst, src);
2606 : break;
2607 : }
2608 : case kX64S128Zero: {
2609 22 : XMMRegister dst = i.OutputSimd128Register();
2610 22 : __ xorps(dst, dst);
2611 : break;
2612 : }
2613 : case kX64I16x8Splat: {
2614 436 : XMMRegister dst = i.OutputSimd128Register();
2615 436 : if (instr->InputAt(0)->IsRegister()) {
2616 436 : __ movd(dst, i.InputRegister(0));
2617 : } else {
2618 0 : __ movd(dst, i.InputOperand(0));
2619 : }
2620 436 : __ pshuflw(dst, dst, 0x0);
2621 436 : __ pshufd(dst, dst, 0x0);
2622 : break;
2623 : }
2624 : case kX64I16x8ExtractLane: {
2625 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2626 48 : Register dst = i.OutputRegister();
2627 96 : __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2628 48 : __ movsxwl(dst, dst);
2629 : break;
2630 : }
2631 : case kX64I16x8ReplaceLane: {
2632 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2633 52 : if (instr->InputAt(2)->IsRegister()) {
2634 104 : __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2635 52 : i.InputInt8(1));
2636 : } else {
2637 0 : __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2638 : }
2639 : break;
2640 : }
2641 : case kX64I16x8SConvertI8x16Low: {
2642 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2643 4 : __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2644 : break;
2645 : }
2646 : case kX64I16x8SConvertI8x16High: {
2647 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2648 : XMMRegister dst = i.OutputSimd128Register();
2649 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2650 : __ pmovsxbw(dst, dst);
2651 : break;
2652 : }
2653 : case kX64I16x8Neg: {
2654 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2655 : XMMRegister dst = i.OutputSimd128Register();
2656 : XMMRegister src = i.InputSimd128Register(0);
2657 4 : if (dst == src) {
2658 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2659 : __ psignw(dst, kScratchDoubleReg);
2660 : } else {
2661 0 : __ pxor(dst, dst);
2662 : __ psubw(dst, src);
2663 : }
2664 : break;
2665 : }
2666 : case kX64I16x8Shl: {
2667 120 : __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2668 60 : break;
2669 : }
2670 : case kX64I16x8ShrS: {
2671 120 : __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2672 60 : break;
2673 : }
2674 : case kX64I16x8SConvertI32x4: {
2675 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2676 4 : __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2677 : break;
2678 : }
2679 : case kX64I16x8Add: {
2680 4 : __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2681 : break;
2682 : }
2683 : case kX64I16x8AddSaturateS: {
2684 4 : __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2685 : break;
2686 : }
2687 : case kX64I16x8AddHoriz: {
2688 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2689 4 : __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2690 : break;
2691 : }
2692 : case kX64I16x8Sub: {
2693 4 : __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2694 : break;
2695 : }
2696 : case kX64I16x8SubSaturateS: {
2697 4 : __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2698 : break;
2699 : }
2700 : case kX64I16x8Mul: {
2701 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2702 4 : __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2703 : break;
2704 : }
2705 : case kX64I16x8MinS: {
2706 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2707 4 : __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2708 : break;
2709 : }
2710 : case kX64I16x8MaxS: {
2711 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2712 4 : __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2713 : break;
2714 : }
2715 : case kX64I16x8Eq: {
2716 12 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2717 : break;
2718 : }
2719 : case kX64I16x8Ne: {
2720 16 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2721 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2722 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2723 : break;
2724 : }
2725 : case kX64I16x8GtS: {
2726 8 : __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2727 : break;
2728 : }
2729 : case kX64I16x8GeS: {
2730 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2731 : XMMRegister dst = i.OutputSimd128Register();
2732 : XMMRegister src = i.InputSimd128Register(1);
2733 8 : __ pminsw(dst, src);
2734 : __ pcmpeqw(dst, src);
2735 : break;
2736 : }
2737 : case kX64I16x8UConvertI8x16Low: {
2738 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2739 4 : __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2740 : break;
2741 : }
2742 : case kX64I16x8UConvertI8x16High: {
2743 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2744 : XMMRegister dst = i.OutputSimd128Register();
2745 4 : __ palignr(dst, i.InputSimd128Register(0), 8);
2746 : __ pmovzxbw(dst, dst);
2747 : break;
2748 : }
2749 : case kX64I16x8ShrU: {
2750 120 : __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2751 60 : break;
2752 : }
2753 : case kX64I16x8UConvertI32x4: {
2754 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2755 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756 : XMMRegister dst = i.OutputSimd128Register();
2757 : // Change negative lanes to 0x7FFFFFFF
2758 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2759 4 : __ psrld(kScratchDoubleReg, 1);
2760 : __ pminud(dst, kScratchDoubleReg);
2761 : __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2762 : __ packusdw(dst, kScratchDoubleReg);
2763 : break;
2764 : }
2765 : case kX64I16x8AddSaturateU: {
2766 4 : __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2767 : break;
2768 : }
2769 : case kX64I16x8SubSaturateU: {
2770 4 : __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2771 : break;
2772 : }
2773 : case kX64I16x8MinU: {
2774 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2775 4 : __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2776 : break;
2777 : }
2778 : case kX64I16x8MaxU: {
2779 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2780 4 : __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2781 : break;
2782 : }
2783 : case kX64I16x8GtU: {
2784 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2785 : XMMRegister dst = i.OutputSimd128Register();
2786 : XMMRegister src = i.InputSimd128Register(1);
2787 16 : __ pmaxuw(dst, src);
2788 : __ pcmpeqw(dst, src);
2789 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2790 : __ pxor(dst, kScratchDoubleReg);
2791 : break;
2792 : }
2793 : case kX64I16x8GeU: {
2794 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2795 : XMMRegister dst = i.OutputSimd128Register();
2796 : XMMRegister src = i.InputSimd128Register(1);
2797 16 : __ pminuw(dst, src);
2798 : __ pcmpeqw(dst, src);
2799 : break;
2800 : }
2801 : case kX64I8x16Splat: {
2802 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2803 : XMMRegister dst = i.OutputSimd128Register();
2804 320 : if (instr->InputAt(0)->IsRegister()) {
2805 320 : __ movd(dst, i.InputRegister(0));
2806 : } else {
2807 0 : __ movd(dst, i.InputOperand(0));
2808 : }
2809 320 : __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2810 : __ pshufb(dst, kScratchDoubleReg);
2811 : break;
2812 : }
2813 : case kX64I8x16ExtractLane: {
2814 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2815 48 : Register dst = i.OutputRegister();
2816 96 : __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2817 48 : __ movsxbl(dst, dst);
2818 : break;
2819 : }
2820 : case kX64I8x16ReplaceLane: {
2821 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2822 84 : if (instr->InputAt(2)->IsRegister()) {
2823 168 : __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2824 84 : i.InputInt8(1));
2825 : } else {
2826 0 : __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2827 : }
2828 : break;
2829 : }
2830 : case kX64I8x16SConvertI16x8: {
2831 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2832 4 : __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2833 : break;
2834 : }
2835 : case kX64I8x16Neg: {
2836 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2837 : XMMRegister dst = i.OutputSimd128Register();
2838 : XMMRegister src = i.InputSimd128Register(0);
2839 4 : if (dst == src) {
2840 4 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2841 : __ psignb(dst, kScratchDoubleReg);
2842 : } else {
2843 0 : __ pxor(dst, dst);
2844 : __ psubb(dst, src);
2845 : }
2846 : break;
2847 : }
2848 : case kX64I8x16Shl: {
2849 : XMMRegister dst = i.OutputSimd128Register();
2850 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2851 28 : int8_t shift = i.InputInt8(1) & 0x7;
2852 28 : if (shift < 4) {
2853 : // For small shifts, doubling is faster.
2854 60 : for (int i = 0; i < shift; ++i) {
2855 24 : __ paddb(dst, dst);
2856 : }
2857 : } else {
2858 : // Mask off the unwanted bits before word-shifting.
2859 16 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2860 16 : __ psrlw(kScratchDoubleReg, 8 + shift);
2861 : __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2862 : __ pand(dst, kScratchDoubleReg);
2863 16 : __ psllw(dst, shift);
2864 : }
2865 : break;
2866 : }
2867 : case kX64I8x16ShrS: {
2868 : XMMRegister dst = i.OutputSimd128Register();
2869 : XMMRegister src = i.InputSimd128Register(0);
2870 28 : int8_t shift = i.InputInt8(1) & 0x7;
2871 : // Unpack the bytes into words, do arithmetic shifts, and repack.
2872 28 : __ punpckhbw(kScratchDoubleReg, src);
2873 : __ punpcklbw(dst, src);
2874 28 : __ psraw(kScratchDoubleReg, 8 + shift);
2875 28 : __ psraw(dst, 8 + shift);
2876 : __ packsswb(dst, kScratchDoubleReg);
2877 : break;
2878 : }
2879 : case kX64I8x16Add: {
2880 4 : __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2881 : break;
2882 : }
2883 : case kX64I8x16AddSaturateS: {
2884 4 : __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2885 : break;
2886 : }
2887 : case kX64I8x16Sub: {
2888 4 : __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2889 : break;
2890 : }
2891 : case kX64I8x16SubSaturateS: {
2892 4 : __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2893 : break;
2894 : }
2895 : case kX64I8x16Mul: {
2896 : XMMRegister dst = i.OutputSimd128Register();
2897 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2898 : XMMRegister right = i.InputSimd128Register(1);
2899 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2900 : // I16x8 view of I8x16
2901 : // left = AAaa AAaa ... AAaa AAaa
2902 : // right= BBbb BBbb ... BBbb BBbb
2903 : // t = 00AA 00AA ... 00AA 00AA
2904 : // s = 00BB 00BB ... 00BB 00BB
2905 4 : __ movaps(tmp, dst);
2906 4 : __ movaps(kScratchDoubleReg, right);
2907 4 : __ psrlw(tmp, 8);
2908 4 : __ psrlw(kScratchDoubleReg, 8);
2909 : // dst = left * 256
2910 4 : __ psllw(dst, 8);
2911 : // t = I16x8Mul(t, s)
2912 : // => __PP __PP ... __PP __PP
2913 : __ pmullw(tmp, kScratchDoubleReg);
2914 : // dst = I16x8Mul(left * 256, right)
2915 : // => pp__ pp__ ... pp__ pp__
2916 : __ pmullw(dst, right);
2917 : // t = I16x8Shl(t, 8)
2918 : // => PP00 PP00 ... PP00 PP00
2919 4 : __ psllw(tmp, 8);
2920 : // dst = I16x8Shr(dst, 8)
2921 : // => 00pp 00pp ... 00pp 00pp
2922 4 : __ psrlw(dst, 8);
2923 : // dst = I16x8Or(dst, t)
2924 : // => PPpp PPpp ... PPpp PPpp
2925 : __ por(dst, tmp);
2926 : break;
2927 : }
2928 : case kX64I8x16MinS: {
2929 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2930 4 : __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2931 : break;
2932 : }
2933 : case kX64I8x16MaxS: {
2934 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2935 4 : __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2936 : break;
2937 : }
2938 : case kX64I8x16Eq: {
2939 12 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2940 : break;
2941 : }
2942 : case kX64I8x16Ne: {
2943 16 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2944 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2945 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2946 : break;
2947 : }
2948 : case kX64I8x16GtS: {
2949 8 : __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2950 : break;
2951 : }
2952 : case kX64I8x16GeS: {
2953 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2954 : XMMRegister dst = i.OutputSimd128Register();
2955 : XMMRegister src = i.InputSimd128Register(1);
2956 8 : __ pminsb(dst, src);
2957 : __ pcmpeqb(dst, src);
2958 : break;
2959 : }
2960 : case kX64I8x16UConvertI16x8: {
2961 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2962 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2963 : XMMRegister dst = i.OutputSimd128Register();
2964 : // Change negative lanes to 0x7FFF
2965 4 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2966 4 : __ psrlw(kScratchDoubleReg, 1);
2967 : __ pminuw(dst, kScratchDoubleReg);
2968 : __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2969 : __ packuswb(dst, kScratchDoubleReg);
2970 : break;
2971 : }
2972 : case kX64I8x16ShrU: {
2973 : XMMRegister dst = i.OutputSimd128Register();
2974 : XMMRegister src = i.InputSimd128Register(0);
2975 28 : int8_t shift = i.InputInt8(1) & 0x7;
2976 : // Unpack the bytes into words, do logical shifts, and repack.
2977 28 : __ punpckhbw(kScratchDoubleReg, src);
2978 : __ punpcklbw(dst, src);
2979 28 : __ psrlw(kScratchDoubleReg, 8 + shift);
2980 28 : __ psrlw(dst, 8 + shift);
2981 : __ packuswb(dst, kScratchDoubleReg);
2982 : break;
2983 : }
2984 : case kX64I8x16AddSaturateU: {
2985 4 : __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2986 : break;
2987 : }
2988 : case kX64I8x16SubSaturateU: {
2989 4 : __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2990 : break;
2991 : }
2992 : case kX64I8x16MinU: {
2993 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2994 4 : __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2995 : break;
2996 : }
2997 : case kX64I8x16MaxU: {
2998 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2999 4 : __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3000 : break;
3001 : }
3002 : case kX64I8x16GtU: {
3003 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3004 : XMMRegister dst = i.OutputSimd128Register();
3005 : XMMRegister src = i.InputSimd128Register(1);
3006 16 : __ pmaxub(dst, src);
3007 : __ pcmpeqb(dst, src);
3008 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3009 : __ pxor(dst, kScratchDoubleReg);
3010 : break;
3011 : }
3012 : case kX64I8x16GeU: {
3013 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3014 : XMMRegister dst = i.OutputSimd128Register();
3015 : XMMRegister src = i.InputSimd128Register(1);
3016 16 : __ pminub(dst, src);
3017 : __ pcmpeqb(dst, src);
3018 : break;
3019 : }
3020 : case kX64S128And: {
3021 4 : __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3022 : break;
3023 : }
3024 : case kX64S128Or: {
3025 4 : __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3026 : break;
3027 : }
3028 : case kX64S128Xor: {
3029 4 : __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3030 : break;
3031 : }
3032 : case kX64S128Not: {
3033 : XMMRegister dst = i.OutputSimd128Register();
3034 : XMMRegister src = i.InputSimd128Register(0);
3035 4 : if (dst == src) {
3036 4 : __ movaps(kScratchDoubleReg, dst);
3037 : __ pcmpeqd(dst, dst);
3038 : __ pxor(dst, kScratchDoubleReg);
3039 : } else {
3040 0 : __ pcmpeqd(dst, dst);
3041 : __ pxor(dst, src);
3042 : }
3043 :
3044 : break;
3045 : }
3046 : case kX64S128Select: {
3047 : // Mask used here is stored in dst.
3048 28 : XMMRegister dst = i.OutputSimd128Register();
3049 28 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3050 28 : __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3051 28 : __ andps(dst, kScratchDoubleReg);
3052 28 : __ xorps(dst, i.InputSimd128Register(2));
3053 : break;
3054 : }
3055 : case kX64S8x16Shuffle: {
3056 : XMMRegister dst = i.OutputSimd128Register();
3057 : Register tmp = i.TempRegister(0);
3058 : // Prepare 16 byte aligned buffer for shuffle control mask
3059 1460 : __ movq(tmp, rsp);
3060 : __ andq(rsp, Immediate(-16));
3061 1460 : if (instr->InputCount() == 5) { // only one input operand
3062 696 : uint32_t mask[4] = {};
3063 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3064 6264 : for (int j = 4; j > 0; j--) {
3065 5568 : mask[j - 1] = i.InputUint32(j);
3066 : }
3067 :
3068 696 : SetupShuffleMaskOnStack(tasm(), mask);
3069 1392 : __ pshufb(dst, Operand(rsp, 0));
3070 : } else { // two input operands
3071 : DCHECK_EQ(6, instr->InputCount());
3072 1528 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
3073 764 : uint32_t mask[4] = {};
3074 6876 : for (int j = 5; j > 1; j--) {
3075 3056 : uint32_t lanes = i.InputUint32(j);
3076 27504 : for (int k = 0; k < 32; k += 8) {
3077 12224 : uint8_t lane = lanes >> k;
3078 12224 : mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3079 : }
3080 : }
3081 764 : SetupShuffleMaskOnStack(tasm(), mask);
3082 1528 : __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
3083 764 : uint32_t mask1[4] = {};
3084 764 : if (instr->InputAt(1)->IsSimd128Register()) {
3085 : XMMRegister src1 = i.InputSimd128Register(1);
3086 764 : if (src1 != dst) __ movups(dst, src1);
3087 : } else {
3088 0 : __ movups(dst, i.InputOperand(1));
3089 : }
3090 6876 : for (int j = 5; j > 1; j--) {
3091 3056 : uint32_t lanes = i.InputUint32(j);
3092 27504 : for (int k = 0; k < 32; k += 8) {
3093 12224 : uint8_t lane = lanes >> k;
3094 12224 : mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3095 : }
3096 : }
3097 764 : SetupShuffleMaskOnStack(tasm(), mask1);
3098 1528 : __ pshufb(dst, Operand(rsp, 0));
3099 : __ por(dst, kScratchDoubleReg);
3100 : }
3101 : __ movq(rsp, tmp);
3102 : break;
3103 : }
3104 : case kX64S32x4Swizzle: {
3105 : DCHECK_EQ(2, instr->InputCount());
3106 840 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3107 : i.InputInt8(1));
3108 : break;
3109 : }
3110 : case kX64S32x4Shuffle: {
3111 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3112 : DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3113 : int8_t shuffle = i.InputInt8(2);
3114 : DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3115 928 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3116 928 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3117 928 : __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3118 : break;
3119 : }
3120 : case kX64S16x8Blend: {
3121 112 : ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3122 56 : break;
3123 : }
3124 : case kX64S16x8HalfShuffle1: {
3125 212 : XMMRegister dst = i.OutputSimd128Register();
3126 636 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3127 212 : __ pshufhw(dst, dst, i.InputInt8(2));
3128 : break;
3129 : }
3130 : case kX64S16x8HalfShuffle2: {
3131 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3132 192 : XMMRegister dst = i.OutputSimd128Register();
3133 576 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3134 192 : __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3135 576 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3136 192 : __ pshufhw(dst, dst, i.InputInt8(3));
3137 192 : __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3138 : break;
3139 : }
3140 : case kX64S8x16Alignr: {
3141 480 : ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3142 240 : break;
3143 : }
3144 : case kX64S16x8Dup: {
3145 180 : XMMRegister dst = i.OutputSimd128Register();
3146 180 : int8_t lane = i.InputInt8(1) & 0x7;
3147 180 : int8_t lane4 = lane & 0x3;
3148 180 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3149 180 : if (lane < 4) {
3150 296 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3151 148 : __ pshufd(dst, dst, 0);
3152 : } else {
3153 64 : ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3154 32 : __ pshufd(dst, dst, 0xaa);
3155 : }
3156 : break;
3157 : }
3158 : case kX64S8x16Dup: {
3159 : XMMRegister dst = i.OutputSimd128Register();
3160 164 : int8_t lane = i.InputInt8(1) & 0xf;
3161 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3162 164 : if (lane < 8) {
3163 156 : __ punpcklbw(dst, dst);
3164 : } else {
3165 8 : __ punpckhbw(dst, dst);
3166 : }
3167 164 : lane &= 0x7;
3168 164 : int8_t lane4 = lane & 0x3;
3169 164 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3170 164 : if (lane < 4) {
3171 164 : __ pshuflw(dst, dst, half_dup);
3172 164 : __ pshufd(dst, dst, 0);
3173 : } else {
3174 0 : __ pshufhw(dst, dst, half_dup);
3175 0 : __ pshufd(dst, dst, 0xaa);
3176 : }
3177 : break;
3178 : }
3179 : case kX64S64x2UnpackHigh:
3180 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3181 : break;
3182 : case kX64S32x4UnpackHigh:
3183 360 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3184 : break;
3185 : case kX64S16x8UnpackHigh:
3186 324 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3187 : break;
3188 : case kX64S8x16UnpackHigh:
3189 252 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3190 : break;
3191 : case kX64S64x2UnpackLow:
3192 24 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3193 : break;
3194 : case kX64S32x4UnpackLow:
3195 276 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3196 : break;
3197 : case kX64S16x8UnpackLow:
3198 252 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3199 : break;
3200 : case kX64S8x16UnpackLow:
3201 264 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3202 : break;
3203 : case kX64S16x8UnzipHigh: {
3204 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3205 : XMMRegister dst = i.OutputSimd128Register();
3206 : XMMRegister src2 = dst;
3207 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3208 116 : if (instr->InputCount() == 2) {
3209 216 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3210 108 : __ psrld(kScratchDoubleReg, 16);
3211 : src2 = kScratchDoubleReg;
3212 : }
3213 116 : __ psrld(dst, 16);
3214 : __ packusdw(dst, src2);
3215 : break;
3216 : }
3217 : case kX64S16x8UnzipLow: {
3218 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3219 : XMMRegister dst = i.OutputSimd128Register();
3220 : XMMRegister src2 = dst;
3221 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3222 76 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3223 76 : if (instr->InputCount() == 2) {
3224 136 : ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3225 : src2 = kScratchDoubleReg;
3226 : }
3227 76 : __ pblendw(dst, kScratchDoubleReg, 0xaa);
3228 : __ packusdw(dst, src2);
3229 : break;
3230 : }
3231 : case kX64S8x16UnzipHigh: {
3232 : XMMRegister dst = i.OutputSimd128Register();
3233 : XMMRegister src2 = dst;
3234 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3235 80 : if (instr->InputCount() == 2) {
3236 144 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3237 72 : __ psrlw(kScratchDoubleReg, 8);
3238 : src2 = kScratchDoubleReg;
3239 : }
3240 80 : __ psrlw(dst, 8);
3241 : __ packuswb(dst, src2);
3242 : break;
3243 : }
3244 : case kX64S8x16UnzipLow: {
3245 : XMMRegister dst = i.OutputSimd128Register();
3246 : XMMRegister src2 = dst;
3247 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3248 156 : if (instr->InputCount() == 2) {
3249 280 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3250 140 : __ psllw(kScratchDoubleReg, 8);
3251 140 : __ psrlw(kScratchDoubleReg, 8);
3252 : src2 = kScratchDoubleReg;
3253 : }
3254 156 : __ psllw(dst, 8);
3255 156 : __ psrlw(dst, 8);
3256 : __ packuswb(dst, src2);
3257 : break;
3258 : }
3259 : case kX64S8x16TransposeLow: {
3260 : XMMRegister dst = i.OutputSimd128Register();
3261 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3262 96 : __ psllw(dst, 8);
3263 96 : if (instr->InputCount() == 1) {
3264 8 : __ movups(kScratchDoubleReg, dst);
3265 : } else {
3266 : DCHECK_EQ(2, instr->InputCount());
3267 176 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3268 88 : __ psllw(kScratchDoubleReg, 8);
3269 : }
3270 96 : __ psrlw(dst, 8);
3271 : __ por(dst, kScratchDoubleReg);
3272 : break;
3273 : }
3274 : case kX64S8x16TransposeHigh: {
3275 : XMMRegister dst = i.OutputSimd128Register();
3276 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3277 132 : __ psrlw(dst, 8);
3278 132 : if (instr->InputCount() == 1) {
3279 8 : __ movups(kScratchDoubleReg, dst);
3280 : } else {
3281 : DCHECK_EQ(2, instr->InputCount());
3282 248 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3283 124 : __ psrlw(kScratchDoubleReg, 8);
3284 : }
3285 132 : __ psllw(kScratchDoubleReg, 8);
3286 : __ por(dst, kScratchDoubleReg);
3287 : break;
3288 : }
3289 : case kX64S8x8Reverse:
3290 : case kX64S8x4Reverse:
3291 : case kX64S8x2Reverse: {
3292 : DCHECK_EQ(1, instr->InputCount());
3293 : XMMRegister dst = i.OutputSimd128Register();
3294 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3295 280 : if (arch_opcode != kX64S8x2Reverse) {
3296 : // First shuffle words into position.
3297 188 : int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3298 188 : __ pshuflw(dst, dst, shuffle_mask);
3299 188 : __ pshufhw(dst, dst, shuffle_mask);
3300 : }
3301 280 : __ movaps(kScratchDoubleReg, dst);
3302 280 : __ psrlw(kScratchDoubleReg, 8);
3303 280 : __ psllw(dst, 8);
3304 : __ por(dst, kScratchDoubleReg);
3305 : break;
3306 : }
3307 : case kX64S1x4AnyTrue:
3308 : case kX64S1x8AnyTrue:
3309 : case kX64S1x16AnyTrue: {
3310 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3311 : Register dst = i.OutputRegister();
3312 : XMMRegister src = i.InputSimd128Register(0);
3313 : Register tmp = i.TempRegister(0);
3314 60 : __ xorq(tmp, tmp);
3315 : __ movq(dst, Immediate(1));
3316 : __ ptest(src, src);
3317 60 : __ cmovq(zero, dst, tmp);
3318 : break;
3319 : }
3320 : case kX64S1x4AllTrue:
3321 : case kX64S1x8AllTrue:
3322 : case kX64S1x16AllTrue: {
3323 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3324 : Register dst = i.OutputRegister();
3325 : XMMRegister src = i.InputSimd128Register(0);
3326 : Register tmp = i.TempRegister(0);
3327 60 : __ movq(tmp, Immediate(1));
3328 : __ xorq(dst, dst);
3329 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3330 : __ pxor(kScratchDoubleReg, src);
3331 : __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3332 60 : __ cmovq(zero, dst, tmp);
3333 : break;
3334 : }
3335 : case kX64StackCheck:
3336 562860 : __ CompareRoot(rsp, RootIndex::kStackLimit);
3337 562860 : break;
3338 : case kWord32AtomicExchangeInt8: {
3339 801 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3340 801 : __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3341 801 : break;
3342 : }
3343 : case kWord32AtomicExchangeUint8: {
3344 646 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3345 : __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3346 : break;
3347 : }
3348 : case kWord32AtomicExchangeInt16: {
3349 833 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3350 835 : __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3351 835 : break;
3352 : }
3353 : case kWord32AtomicExchangeUint16: {
3354 800 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3355 : __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3356 : break;
3357 : }
3358 : case kWord32AtomicExchangeWord32: {
3359 1276 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3360 : break;
3361 : }
3362 : case kWord32AtomicCompareExchangeInt8: {
3363 112 : __ lock();
3364 112 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3365 112 : __ movsxbl(rax, rax);
3366 112 : break;
3367 : }
3368 : case kWord32AtomicCompareExchangeUint8: {
3369 133 : __ lock();
3370 133 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3371 : __ movzxbl(rax, rax);
3372 : break;
3373 : }
3374 : case kWord32AtomicCompareExchangeInt16: {
3375 112 : __ lock();
3376 112 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3377 112 : __ movsxwl(rax, rax);
3378 112 : break;
3379 : }
3380 : case kWord32AtomicCompareExchangeUint16: {
3381 133 : __ lock();
3382 133 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3383 : __ movzxwl(rax, rax);
3384 : break;
3385 : }
3386 : case kWord32AtomicCompareExchangeWord32: {
3387 264 : __ lock();
3388 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3389 : break;
3390 : }
3391 : #define ATOMIC_BINOP_CASE(op, inst) \
3392 : case kWord32Atomic##op##Int8: \
3393 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3394 : __ movsxbl(rax, rax); \
3395 : break; \
3396 : case kWord32Atomic##op##Uint8: \
3397 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3398 : __ movzxbl(rax, rax); \
3399 : break; \
3400 : case kWord32Atomic##op##Int16: \
3401 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3402 : __ movsxwl(rax, rax); \
3403 : break; \
3404 : case kWord32Atomic##op##Uint16: \
3405 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3406 : __ movzxwl(rax, rax); \
3407 : break; \
3408 : case kWord32Atomic##op##Word32: \
3409 : ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3410 : break;
3411 8752 : ATOMIC_BINOP_CASE(Add, addl)
3412 8267 : ATOMIC_BINOP_CASE(Sub, subl)
3413 8186 : ATOMIC_BINOP_CASE(And, andl)
3414 7557 : ATOMIC_BINOP_CASE(Or, orl)
3415 8369 : ATOMIC_BINOP_CASE(Xor, xorl)
3416 : #undef ATOMIC_BINOP_CASE
3417 : case kX64Word64AtomicExchangeUint8: {
3418 1454 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3419 : __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3420 : break;
3421 : }
3422 : case kX64Word64AtomicExchangeUint16: {
3423 1280 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3424 : __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3425 : break;
3426 : }
3427 : case kX64Word64AtomicExchangeUint32: {
3428 714 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3429 : break;
3430 : }
3431 : case kX64Word64AtomicExchangeUint64: {
3432 963 : __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3433 : break;
3434 : }
3435 : case kX64Word64AtomicCompareExchangeUint8: {
3436 21 : __ lock();
3437 21 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3438 : __ movzxbq(rax, rax);
3439 : break;
3440 : }
3441 : case kX64Word64AtomicCompareExchangeUint16: {
3442 28 : __ lock();
3443 28 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3444 : __ movzxwq(rax, rax);
3445 : break;
3446 : }
3447 : case kX64Word64AtomicCompareExchangeUint32: {
3448 29 : __ lock();
3449 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3450 : break;
3451 : }
3452 : case kX64Word64AtomicCompareExchangeUint64: {
3453 276 : __ lock();
3454 : __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3455 : break;
3456 : }
3457 : #define ATOMIC64_BINOP_CASE(op, inst) \
3458 : case kX64Word64Atomic##op##Uint8: \
3459 : ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3460 : __ movzxbq(rax, rax); \
3461 : break; \
3462 : case kX64Word64Atomic##op##Uint16: \
3463 : ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3464 : __ movzxwq(rax, rax); \
3465 : break; \
3466 : case kX64Word64Atomic##op##Uint32: \
3467 : ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3468 : break; \
3469 : case kX64Word64Atomic##op##Uint64: \
3470 : ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3471 : break;
3472 8542 : ATOMIC64_BINOP_CASE(Add, addq)
3473 7063 : ATOMIC64_BINOP_CASE(Sub, subq)
3474 8118 : ATOMIC64_BINOP_CASE(And, andq)
3475 8870 : ATOMIC64_BINOP_CASE(Or, orq)
3476 7084 : ATOMIC64_BINOP_CASE(Xor, xorq)
3477 : #undef ATOMIC64_BINOP_CASE
3478 : case kWord32AtomicLoadInt8:
3479 : case kWord32AtomicLoadUint8:
3480 : case kWord32AtomicLoadInt16:
3481 : case kWord32AtomicLoadUint16:
3482 : case kWord32AtomicLoadWord32:
3483 : case kWord32AtomicStoreWord8:
3484 : case kWord32AtomicStoreWord16:
3485 : case kWord32AtomicStoreWord32:
3486 : case kX64Word64AtomicLoadUint8:
3487 : case kX64Word64AtomicLoadUint16:
3488 : case kX64Word64AtomicLoadUint32:
3489 : case kX64Word64AtomicLoadUint64:
3490 : case kX64Word64AtomicStoreWord8:
3491 : case kX64Word64AtomicStoreWord16:
3492 : case kX64Word64AtomicStoreWord32:
3493 : case kX64Word64AtomicStoreWord64:
3494 0 : UNREACHABLE(); // Won't be generated by instruction selector.
3495 : break;
3496 : }
3497 : return kSuccess;
3498 : } // NOLadability/fn_size)
3499 :
3500 : #undef ASSEMBLE_UNOP
3501 : #undef ASSEMBLE_BINOP
3502 : #undef ASSEMBLE_COMPARE
3503 : #undef ASSEMBLE_MULT
3504 : #undef ASSEMBLE_SHIFT
3505 : #undef ASSEMBLE_MOVX
3506 : #undef ASSEMBLE_SSE_BINOP
3507 : #undef ASSEMBLE_SSE_UNOP
3508 : #undef ASSEMBLE_AVX_BINOP
3509 : #undef ASSEMBLE_IEEE754_BINOP
3510 : #undef ASSEMBLE_IEEE754_UNOP
3511 : #undef ASSEMBLE_ATOMIC_BINOP
3512 : #undef ASSEMBLE_ATOMIC64_BINOP
3513 : #undef ASSEMBLE_SIMD_INSTR
3514 : #undef ASSEMBLE_SIMD_IMM_INSTR
3515 : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3516 : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
3517 :
3518 : namespace {
3519 :
3520 6215662 : Condition FlagsConditionToCondition(FlagsCondition condition) {
3521 6215662 : switch (condition) {
3522 : case kUnorderedEqual:
3523 : case kEqual:
3524 : return equal;
3525 : case kUnorderedNotEqual:
3526 : case kNotEqual:
3527 1455948 : return not_equal;
3528 : case kSignedLessThan:
3529 176737 : return less;
3530 : case kSignedGreaterThanOrEqual:
3531 59103 : return greater_equal;
3532 : case kSignedLessThanOrEqual:
3533 67408 : return less_equal;
3534 : case kSignedGreaterThan:
3535 67152 : return greater;
3536 : case kUnsignedLessThan:
3537 165286 : return below;
3538 : case kUnsignedGreaterThanOrEqual:
3539 377166 : return above_equal;
3540 : case kUnsignedLessThanOrEqual:
3541 945490 : return below_equal;
3542 : case kUnsignedGreaterThan:
3543 125711 : return above;
3544 : case kOverflow:
3545 179872 : return overflow;
3546 : case kNotOverflow:
3547 1008 : return no_overflow;
3548 : default:
3549 : break;
3550 : }
3551 0 : UNREACHABLE();
3552 : }
3553 :
3554 : } // namespace
3555 :
3556 : // Assembles branches after this instruction.
3557 5366374 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3558 : Label::Distance flabel_distance =
3559 5366374 : branch->fallthru ? Label::kNear : Label::kFar;
3560 5366374 : Label* tlabel = branch->true_label;
3561 5366374 : Label* flabel = branch->false_label;
3562 5366374 : if (branch->condition == kUnorderedEqual) {
3563 57543 : __ j(parity_even, flabel, flabel_distance);
3564 5308831 : } else if (branch->condition == kUnorderedNotEqual) {
3565 104727 : __ j(parity_even, tlabel);
3566 : }
3567 5366372 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3568 :
3569 5366397 : if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3570 5366397 : }
3571 :
3572 0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3573 : Instruction* instr) {
3574 : // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3575 0 : if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3576 : return;
3577 : }
3578 :
3579 : condition = NegateFlagsCondition(condition);
3580 0 : __ movl(kScratchRegister, Immediate(0));
3581 0 : __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3582 0 : kScratchRegister);
3583 : }
3584 :
3585 330893 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3586 : BranchInfo* branch) {
3587 : Label::Distance flabel_distance =
3588 330893 : branch->fallthru ? Label::kNear : Label::kFar;
3589 330893 : Label* tlabel = branch->true_label;
3590 330893 : Label* flabel = branch->false_label;
3591 330893 : Label nodeopt;
3592 330893 : if (branch->condition == kUnorderedEqual) {
3593 0 : __ j(parity_even, flabel, flabel_distance);
3594 330893 : } else if (branch->condition == kUnorderedNotEqual) {
3595 4138 : __ j(parity_even, tlabel);
3596 : }
3597 330893 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3598 :
3599 330893 : if (FLAG_deopt_every_n_times > 0) {
3600 : ExternalReference counter =
3601 288 : ExternalReference::stress_deopt_count(isolate());
3602 :
3603 288 : __ pushfq();
3604 288 : __ pushq(rax);
3605 288 : __ load_rax(counter);
3606 : __ decl(rax);
3607 288 : __ j(not_zero, &nodeopt);
3608 :
3609 576 : __ Set(rax, FLAG_deopt_every_n_times);
3610 288 : __ store_rax(counter);
3611 288 : __ popq(rax);
3612 288 : __ popfq();
3613 288 : __ jmp(tlabel);
3614 :
3615 288 : __ bind(&nodeopt);
3616 288 : __ store_rax(counter);
3617 288 : __ popq(rax);
3618 288 : __ popfq();
3619 : }
3620 :
3621 330893 : if (!branch->fallthru) {
3622 0 : __ jmp(flabel, flabel_distance);
3623 : }
3624 330893 : }
3625 :
3626 5054591 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
3627 8283638 : if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3628 5054623 : }
3629 :
3630 142017 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
3631 : FlagsCondition condition) {
3632 : auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3633 : Label* tlabel = ool->entry();
3634 141969 : Label end;
3635 141969 : if (condition == kUnorderedEqual) {
3636 0 : __ j(parity_even, &end);
3637 141969 : } else if (condition == kUnorderedNotEqual) {
3638 307 : __ j(parity_even, tlabel);
3639 : }
3640 141969 : __ j(FlagsConditionToCondition(condition), tlabel);
3641 142085 : __ bind(&end);
3642 142148 : }
3643 :
3644 : // Assembles boolean materializations after this instruction.
3645 376447 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3646 : FlagsCondition condition) {
3647 : X64OperandConverter i(this, instr);
3648 376447 : Label done;
3649 :
3650 : // Materialize a full 64-bit 1 or 0 value. The result register is always the
3651 : // last output of the instruction.
3652 376447 : Label check;
3653 : DCHECK_NE(0u, instr->OutputCount());
3654 376447 : Register reg = i.OutputRegister(instr->OutputCount() - 1);
3655 376447 : if (condition == kUnorderedEqual) {
3656 2892 : __ j(parity_odd, &check, Label::kNear);
3657 : __ movl(reg, Immediate(0));
3658 2892 : __ jmp(&done, Label::kNear);
3659 373555 : } else if (condition == kUnorderedNotEqual) {
3660 2498 : __ j(parity_odd, &check, Label::kNear);
3661 : __ movl(reg, Immediate(1));
3662 2499 : __ jmp(&done, Label::kNear);
3663 : }
3664 376447 : __ bind(&check);
3665 376445 : __ setcc(FlagsConditionToCondition(condition), reg);
3666 : __ movzxbl(reg, reg);
3667 376449 : __ bind(&done);
3668 376451 : }
3669 :
3670 34385 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3671 : X64OperandConverter i(this, instr);
3672 34385 : Register input = i.InputRegister(0);
3673 : std::vector<std::pair<int32_t, Label*>> cases;
3674 420657 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3675 386272 : cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3676 : }
3677 : AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3678 34385 : cases.data() + cases.size());
3679 34385 : }
3680 :
3681 0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3682 : X64OperandConverter i(this, instr);
3683 : Register input = i.InputRegister(0);
3684 0 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3685 0 : __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3686 0 : __ j(equal, GetLabel(i.InputRpo(index + 1)));
3687 : }
3688 0 : AssembleArchJump(i.InputRpo(1));
3689 0 : }
3690 :
3691 344 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3692 : X64OperandConverter i(this, instr);
3693 : Register input = i.InputRegister(0);
3694 344 : int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3695 344 : Label** cases = zone()->NewArray<Label*>(case_count);
3696 399988 : for (int32_t index = 0; index < case_count; ++index) {
3697 399644 : cases[index] = GetLabel(i.InputRpo(index + 2));
3698 : }
3699 344 : Label* const table = AddJumpTable(cases, case_count);
3700 344 : __ cmpl(input, Immediate(case_count));
3701 344 : __ j(above_equal, GetLabel(i.InputRpo(1)));
3702 688 : __ leaq(kScratchRegister, Operand(table));
3703 344 : __ jmp(Operand(kScratchRegister, input, times_8, 0));
3704 344 : }
3705 :
3706 : namespace {
3707 :
3708 : static const int kQuadWordSize = 16;
3709 :
3710 : } // namespace
3711 :
3712 2640179 : void CodeGenerator::FinishFrame(Frame* frame) {
3713 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3714 :
3715 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3716 2640179 : if (saves_fp != 0) {
3717 : frame->AlignSavedCalleeRegisterSlots();
3718 0 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3719 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3720 0 : frame->AllocateSavedCalleeRegisterSlots(
3721 0 : saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3722 : }
3723 : }
3724 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3725 2640179 : if (saves != 0) { // Save callee-saved registers.
3726 : int count = 0;
3727 29846157 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3728 14470864 : if (((1 << i) & saves)) {
3729 4522145 : ++count;
3730 : }
3731 : }
3732 : frame->AllocateSavedCalleeRegisterSlots(count);
3733 : }
3734 2640179 : }
3735 :
3736 2662395 : void CodeGenerator::AssembleConstructFrame() {
3737 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3738 2662395 : if (frame_access_state()->has_frame()) {
3739 : int pc_base = __ pc_offset();
3740 :
3741 2662870 : if (call_descriptor->IsCFunctionCall()) {
3742 904429 : __ pushq(rbp);
3743 : __ movq(rbp, rsp);
3744 1758441 : } else if (call_descriptor->IsJSFunctionCall()) {
3745 645321 : __ Prologue();
3746 645328 : if (call_descriptor->PushArgumentCount()) {
3747 39056 : __ pushq(kJavaScriptCallArgCountRegister);
3748 : }
3749 : } else {
3750 2226158 : __ StubPrologue(info()->GetOutputStackFrameType());
3751 1112894 : if (call_descriptor->IsWasmFunctionCall()) {
3752 987493 : __ pushq(kWasmInstanceRegister);
3753 125401 : } else if (call_descriptor->IsWasmImportWrapper()) {
3754 : // WASM import wrappers are passed a tuple in the place of the instance.
3755 : // Unpack the tuple into the instance and the target callable.
3756 : // This must be done here in the codegen because it cannot be expressed
3757 : // properly in the graph.
3758 : __ LoadTaggedPointerField(
3759 : kJSFunctionRegister,
3760 6859 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3761 : __ LoadTaggedPointerField(
3762 : kWasmInstanceRegister,
3763 6859 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3764 6859 : __ pushq(kWasmInstanceRegister);
3765 : }
3766 : }
3767 :
3768 2662980 : unwinding_info_writer_.MarkFrameConstructed(pc_base);
3769 : }
3770 : int required_slots = frame()->GetTotalFrameSlotCount() -
3771 2661666 : call_descriptor->CalculateFixedFrameSize();
3772 :
3773 2662265 : if (info()->is_osr()) {
3774 : // TurboFan OSR-compiled functions cannot be entered directly.
3775 4647 : __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3776 :
3777 : // Unoptimized code jumps directly to this entrypoint while the unoptimized
3778 : // frame is still on the stack. Optimized code uses OSR values directly from
3779 : // the unoptimized frame. Thus, all that needs to be done is to allocate the
3780 : // remaining stack slots.
3781 4647 : if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3782 4647 : osr_pc_offset_ = __ pc_offset();
3783 4647 : required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3784 4647 : ResetSpeculationPoison();
3785 : }
3786 :
3787 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3788 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3789 :
3790 2662118 : if (required_slots > 0) {
3791 : DCHECK(frame_access_state()->has_frame());
3792 2048890 : if (info()->IsWasm() && required_slots > 128) {
3793 : // For WebAssembly functions with big frames we have to do the stack
3794 : // overflow check before we construct the frame. Otherwise we may not
3795 : // have enough space on the stack to call the runtime for the stack
3796 : // overflow.
3797 8 : Label done;
3798 :
3799 : // If the frame is bigger than the stack, we throw the stack overflow
3800 : // exception unconditionally. Thereby we can avoid the integer overflow
3801 : // check in the condition code.
3802 8 : if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3803 8 : __ movq(kScratchRegister,
3804 : FieldOperand(kWasmInstanceRegister,
3805 : WasmInstanceObject::kRealStackLimitAddressOffset));
3806 16 : __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3807 : __ addq(kScratchRegister,
3808 : Immediate(required_slots * kSystemPointerSize));
3809 : __ cmpq(rsp, kScratchRegister);
3810 8 : __ j(above_equal, &done);
3811 : }
3812 :
3813 8 : __ near_call(wasm::WasmCode::kWasmStackOverflow,
3814 8 : RelocInfo::WASM_STUB_CALL);
3815 : ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3816 : RecordSafepoint(reference_map, Safepoint::kSimple,
3817 8 : Safepoint::kNoLazyDeopt);
3818 8 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3819 8 : __ bind(&done);
3820 : }
3821 :
3822 : // Skip callee-saved and return slots, which are created below.
3823 2048890 : required_slots -= base::bits::CountPopulation(saves);
3824 : required_slots -= base::bits::CountPopulation(saves_fp) *
3825 2048890 : (kQuadWordSize / kSystemPointerSize);
3826 2048890 : required_slots -= frame()->GetReturnSlotCount();
3827 2048890 : if (required_slots > 0) {
3828 1872400 : __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
3829 : }
3830 : }
3831 :
3832 2662133 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3833 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3834 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3835 : // Adjust the stack pointer.
3836 0 : __ subq(rsp, Immediate(stack_size));
3837 : // Store the registers on the stack.
3838 : int slot_idx = 0;
3839 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3840 0 : if (!((1 << i) & saves_fp)) continue;
3841 0 : __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3842 0 : XMMRegister::from_code(i));
3843 0 : slot_idx++;
3844 : }
3845 : }
3846 :
3847 2662133 : if (saves != 0) { // Save callee-saved registers.
3848 29846157 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3849 14470864 : if (!((1 << i) & saves)) continue;
3850 4522145 : __ pushq(Register::from_code(i));
3851 : }
3852 : }
3853 :
3854 : // Allocate return slots (located after callee-saved).
3855 2662133 : if (frame()->GetReturnSlotCount() > 0) {
3856 667 : __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3857 : }
3858 2662133 : }
3859 :
3860 2999050 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3861 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3862 :
3863 : // Restore registers.
3864 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3865 2999050 : if (saves != 0) {
3866 : const int returns = frame()->GetReturnSlotCount();
3867 913797 : if (returns != 0) {
3868 656 : __ addq(rsp, Immediate(returns * kSystemPointerSize));
3869 : }
3870 30155301 : for (int i = 0; i < Register::kNumRegisters; i++) {
3871 14620752 : if (!((1 << i) & saves)) continue;
3872 4568985 : __ popq(Register::from_code(i));
3873 : }
3874 : }
3875 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3876 2999050 : if (saves_fp != 0) {
3877 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3878 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3879 : // Load the registers from the stack.
3880 : int slot_idx = 0;
3881 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3882 0 : if (!((1 << i) & saves_fp)) continue;
3883 0 : __ movdqu(XMMRegister::from_code(i),
3884 0 : Operand(rsp, kQuadWordSize * slot_idx));
3885 0 : slot_idx++;
3886 : }
3887 : // Adjust the stack pointer.
3888 0 : __ addq(rsp, Immediate(stack_size));
3889 : }
3890 :
3891 : unwinding_info_writer_.MarkBlockWillExit();
3892 :
3893 : // Might need rcx for scratch if pop_size is too big or if there is a variable
3894 : // pop count.
3895 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3896 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3897 2998956 : size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3898 : X64OperandConverter g(this, nullptr);
3899 2998956 : if (call_descriptor->IsCFunctionCall()) {
3900 913797 : AssembleDeconstructFrame();
3901 2085159 : } else if (frame_access_state()->has_frame()) {
3902 4043031 : if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3903 : // Canonicalize JSFunction return sites for now.
3904 2001715 : if (return_label_.is_bound()) {
3905 369527 : __ jmp(&return_label_);
3906 : return;
3907 : } else {
3908 1632188 : __ bind(&return_label_);
3909 1632216 : AssembleDeconstructFrame();
3910 : }
3911 : } else {
3912 39900 : AssembleDeconstructFrame();
3913 : }
3914 : }
3915 :
3916 2630540 : if (pop->IsImmediate()) {
3917 5181504 : pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3918 2590840 : CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3919 5181680 : __ Ret(static_cast<int>(pop_size), rcx);
3920 : } else {
3921 : Register pop_reg = g.ToRegister(pop);
3922 39876 : Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3923 39876 : __ popq(scratch_reg);
3924 79752 : __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3925 39876 : __ jmp(scratch_reg);
3926 : }
3927 : }
3928 :
3929 2639698 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3930 :
3931 37845491 : void CodeGenerator::AssembleMove(InstructionOperand* source,
3932 : InstructionOperand* destination) {
3933 : X64OperandConverter g(this, nullptr);
3934 : // Helper function to write the given constant to the dst register.
3935 18417160 : auto MoveConstantToRegister = [&](Register dst, Constant src) {
3936 18417160 : switch (src.type()) {
3937 : case Constant::kInt32: {
3938 3995559 : if (RelocInfo::IsWasmReference(src.rmode())) {
3939 26273858 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3940 : } else {
3941 : int32_t value = src.ToInt32();
3942 3995559 : if (value == 0) {
3943 1108520 : __ xorl(dst, dst);
3944 : } else {
3945 2887039 : __ movl(dst, Immediate(value));
3946 : }
3947 : }
3948 : break;
3949 : }
3950 : case Constant::kInt64:
3951 2325758 : if (RelocInfo::IsWasmReference(src.rmode())) {
3952 0 : __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
3953 : } else {
3954 2325758 : __ Set(dst, src.ToInt64());
3955 : }
3956 : break;
3957 : case Constant::kFloat32:
3958 736 : __ MoveNumber(dst, src.ToFloat32());
3959 368 : break;
3960 : case Constant::kFloat64:
3961 1521229 : __ MoveNumber(dst, src.ToFloat64().value());
3962 1521233 : break;
3963 : case Constant::kExternalReference:
3964 2715123 : __ Move(dst, src.ToExternalReference());
3965 2715137 : break;
3966 : case Constant::kHeapObject: {
3967 7856838 : Handle<HeapObject> src_object = src.ToHeapObject();
3968 : RootIndex index;
3969 7856837 : if (IsMaterializableFromRoot(src_object, &index)) {
3970 1901376 : __ LoadRoot(dst, index);
3971 : } else {
3972 5955464 : __ Move(dst, src_object);
3973 : }
3974 : break;
3975 : }
3976 : case Constant::kDelayedStringConstant: {
3977 2144 : const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3978 2144 : __ MoveStringConstant(dst, src_constant);
3979 2144 : break;
3980 : }
3981 : case Constant::kRpoNumber:
3982 0 : UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3983 : break;
3984 : }
3985 56262757 : };
3986 : // Helper function to write the given constant to the stack.
3987 38610 : auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3988 38610 : if (!RelocInfo::IsWasmReference(src.rmode())) {
3989 38610 : switch (src.type()) {
3990 : case Constant::kInt32:
3991 38610 : __ movq(dst, Immediate(src.ToInt32()));
3992 19510 : return;
3993 : case Constant::kInt64:
3994 13261 : __ Set(dst, src.ToInt64());
3995 13261 : return;
3996 : default:
3997 : break;
3998 : }
3999 : }
4000 5839 : MoveConstantToRegister(kScratchRegister, src);
4001 5839 : __ movq(dst, kScratchRegister);
4002 37845491 : };
4003 : // Dispatch on the source and destination operand kinds.
4004 37845491 : switch (MoveType::InferMove(source, destination)) {
4005 : case MoveType::kRegisterToRegister:
4006 4218879 : if (source->IsRegister()) {
4007 4076762 : __ movq(g.ToRegister(destination), g.ToRegister(source));
4008 : } else {
4009 : DCHECK(source->IsFPRegister());
4010 : __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4011 : }
4012 : return;
4013 : case MoveType::kRegisterToStack: {
4014 5689569 : Operand dst = g.ToOperand(destination);
4015 5689569 : if (source->IsRegister()) {
4016 5376083 : __ movq(dst, g.ToRegister(source));
4017 : } else {
4018 : DCHECK(source->IsFPRegister());
4019 : XMMRegister src = g.ToDoubleRegister(source);
4020 : MachineRepresentation rep =
4021 : LocationOperand::cast(source)->representation();
4022 313486 : if (rep != MachineRepresentation::kSimd128) {
4023 : __ Movsd(dst, src);
4024 : } else {
4025 : __ Movups(dst, src);
4026 : }
4027 : }
4028 : return;
4029 : }
4030 : case MoveType::kStackToRegister: {
4031 8891193 : Operand src = g.ToOperand(source);
4032 8891193 : if (source->IsStackSlot()) {
4033 8405090 : __ movq(g.ToRegister(destination), src);
4034 : } else {
4035 : DCHECK(source->IsFPStackSlot());
4036 : XMMRegister dst = g.ToDoubleRegister(destination);
4037 : MachineRepresentation rep =
4038 : LocationOperand::cast(source)->representation();
4039 486103 : if (rep != MachineRepresentation::kSimd128) {
4040 : __ Movsd(dst, src);
4041 : } else {
4042 : __ Movups(dst, src);
4043 : }
4044 : }
4045 : return;
4046 : }
4047 : case MoveType::kStackToStack: {
4048 50185 : Operand src = g.ToOperand(source);
4049 50185 : Operand dst = g.ToOperand(destination);
4050 50185 : if (source->IsStackSlot()) {
4051 : // Spill on demand to use a temporary register for memory-to-memory
4052 : // moves.
4053 25139 : __ movq(kScratchRegister, src);
4054 : __ movq(dst, kScratchRegister);
4055 : } else {
4056 : MachineRepresentation rep =
4057 : LocationOperand::cast(source)->representation();
4058 25046 : if (rep != MachineRepresentation::kSimd128) {
4059 : __ Movsd(kScratchDoubleReg, src);
4060 : __ Movsd(dst, kScratchDoubleReg);
4061 : } else {
4062 : DCHECK(source->IsSimd128StackSlot());
4063 : __ Movups(kScratchDoubleReg, src);
4064 : __ Movups(dst, kScratchDoubleReg);
4065 : }
4066 : }
4067 : return;
4068 : }
4069 : case MoveType::kConstantToRegister: {
4070 18951215 : Constant src = g.ToConstant(source);
4071 18951439 : if (destination->IsRegister()) {
4072 18411400 : MoveConstantToRegister(g.ToRegister(destination), src);
4073 : } else {
4074 : DCHECK(destination->IsFPRegister());
4075 540039 : XMMRegister dst = g.ToDoubleRegister(destination);
4076 540039 : if (src.type() == Constant::kFloat32) {
4077 : // TODO(turbofan): Can we do better here?
4078 125126 : __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4079 : } else {
4080 : DCHECK_EQ(src.type(), Constant::kFloat64);
4081 414913 : __ Move(dst, src.ToFloat64().AsUint64());
4082 : }
4083 : }
4084 : return;
4085 : }
4086 : case MoveType::kConstantToStack: {
4087 45337 : Constant src = g.ToConstant(source);
4088 45337 : Operand dst = g.ToOperand(destination);
4089 45337 : if (destination->IsStackSlot()) {
4090 38610 : MoveConstantToSlot(dst, src);
4091 : } else {
4092 : DCHECK(destination->IsFPStackSlot());
4093 6727 : if (src.type() == Constant::kFloat32) {
4094 3060 : __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4095 : } else {
4096 : DCHECK_EQ(src.type(), Constant::kFloat64);
4097 3667 : __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4098 : __ movq(dst, kScratchRegister);
4099 : }
4100 : }
4101 : return;
4102 : }
4103 : }
4104 0 : UNREACHABLE();
4105 : }
4106 :
4107 78622 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
4108 : InstructionOperand* destination) {
4109 : X64OperandConverter g(this, nullptr);
4110 : // Dispatch on the source and destination operand kinds. Not all
4111 : // combinations are possible.
4112 78622 : switch (MoveType::InferSwap(source, destination)) {
4113 : case MoveType::kRegisterToRegister: {
4114 68315 : if (source->IsRegister()) {
4115 : Register src = g.ToRegister(source);
4116 : Register dst = g.ToRegister(destination);
4117 65729 : __ movq(kScratchRegister, src);
4118 : __ movq(src, dst);
4119 : __ movq(dst, kScratchRegister);
4120 : } else {
4121 : DCHECK(source->IsFPRegister());
4122 : XMMRegister src = g.ToDoubleRegister(source);
4123 : XMMRegister dst = g.ToDoubleRegister(destination);
4124 : __ Movapd(kScratchDoubleReg, src);
4125 : __ Movapd(src, dst);
4126 : __ Movapd(dst, kScratchDoubleReg);
4127 : }
4128 : return;
4129 : }
4130 : case MoveType::kRegisterToStack: {
4131 6543 : if (source->IsRegister()) {
4132 : Register src = g.ToRegister(source);
4133 1747 : __ pushq(src);
4134 : frame_access_state()->IncreaseSPDelta(1);
4135 1747 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4136 1747 : kSystemPointerSize);
4137 : __ movq(src, g.ToOperand(destination));
4138 : frame_access_state()->IncreaseSPDelta(-1);
4139 1747 : __ popq(g.ToOperand(destination));
4140 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4141 1747 : -kSystemPointerSize);
4142 : } else {
4143 : DCHECK(source->IsFPRegister());
4144 : XMMRegister src = g.ToDoubleRegister(source);
4145 4796 : Operand dst = g.ToOperand(destination);
4146 : MachineRepresentation rep =
4147 : LocationOperand::cast(source)->representation();
4148 4796 : if (rep != MachineRepresentation::kSimd128) {
4149 : __ Movsd(kScratchDoubleReg, src);
4150 : __ Movsd(src, dst);
4151 : __ Movsd(dst, kScratchDoubleReg);
4152 : } else {
4153 : __ Movups(kScratchDoubleReg, src);
4154 : __ Movups(src, dst);
4155 : __ Movups(dst, kScratchDoubleReg);
4156 : }
4157 : }
4158 : return;
4159 : }
4160 : case MoveType::kStackToStack: {
4161 3765 : Operand src = g.ToOperand(source);
4162 3765 : Operand dst = g.ToOperand(destination);
4163 : MachineRepresentation rep =
4164 : LocationOperand::cast(source)->representation();
4165 3765 : if (rep != MachineRepresentation::kSimd128) {
4166 : Register tmp = kScratchRegister;
4167 2929 : __ movq(tmp, dst);
4168 2929 : __ pushq(src); // Then use stack to copy src to destination.
4169 2929 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4170 2929 : kSystemPointerSize);
4171 2929 : __ popq(dst);
4172 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4173 2929 : -kSystemPointerSize);
4174 : __ movq(src, tmp);
4175 : } else {
4176 : // Without AVX, misaligned reads and writes will trap. Move using the
4177 : // stack, in two parts.
4178 836 : __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4179 836 : __ pushq(src); // Then use stack to copy src to destination.
4180 836 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4181 836 : kSystemPointerSize);
4182 836 : __ popq(dst);
4183 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4184 836 : -kSystemPointerSize);
4185 836 : __ pushq(g.ToOperand(source, kSystemPointerSize));
4186 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4187 836 : kSystemPointerSize);
4188 836 : __ popq(g.ToOperand(destination, kSystemPointerSize));
4189 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4190 836 : -kSystemPointerSize);
4191 836 : __ movups(src, kScratchDoubleReg);
4192 : }
4193 : return;
4194 : }
4195 : default:
4196 0 : UNREACHABLE();
4197 : break;
4198 : }
4199 : }
4200 :
4201 344 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4202 399988 : for (size_t index = 0; index < target_count; ++index) {
4203 199822 : __ dq(targets[index]);
4204 : }
4205 344 : }
4206 :
4207 : #undef __
4208 :
4209 : } // namespace compiler
4210 : } // namespace internal
4211 121996 : } // namespace v8
|