Line data Source code
1 : // Copyright 2013 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #include "src/compiler/backend/code-generator.h"
6 :
7 : #include <limits>
8 :
9 : #include "src/base/overflowing-math.h"
10 : #include "src/compiler/backend/code-generator-impl.h"
11 : #include "src/compiler/backend/gap-resolver.h"
12 : #include "src/compiler/node-matchers.h"
13 : #include "src/compiler/osr.h"
14 : #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 : #include "src/macro-assembler.h"
16 : #include "src/objects/smi.h"
17 : #include "src/optimized-compilation-info.h"
18 : #include "src/wasm/wasm-code-manager.h"
19 : #include "src/wasm/wasm-objects.h"
20 : #include "src/x64/assembler-x64.h"
21 :
22 : namespace v8 {
23 : namespace internal {
24 : namespace compiler {
25 :
26 : #define __ tasm()->
27 :
28 : // Adds X64 specific methods for decoding operands.
29 : class X64OperandConverter : public InstructionOperandConverter {
30 : public:
31 : X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : : InstructionOperandConverter(gen, instr) {}
33 :
34 : Immediate InputImmediate(size_t index) {
35 5344908 : return ToImmediate(instr_->InputAt(index));
36 : }
37 :
38 1044868 : Operand InputOperand(size_t index, int extra = 0) {
39 3134600 : return ToOperand(instr_->InputAt(index), extra);
40 : }
41 :
42 0 : Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 :
44 4289866 : Immediate ToImmediate(InstructionOperand* operand) {
45 4289866 : Constant constant = ToConstant(operand);
46 4289899 : if (constant.type() == Constant::kFloat64) {
47 : DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 20796 : return Immediate(0);
49 : }
50 4269103 : if (RelocInfo::IsWasmReference(constant.rmode())) {
51 0 : return Immediate(constant.ToInt32(), constant.rmode());
52 : }
53 4269103 : return Immediate(constant.ToInt32());
54 : }
55 :
56 : Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 : DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 13160463 : return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 : }
60 :
61 13171980 : Operand SlotToOperand(int slot_index, int extra = 0) {
62 26343960 : FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 : return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 26344022 : offset.offset() + extra);
65 : }
66 :
67 : static size_t NextOffset(size_t* offset) {
68 14598246 : size_t i = *offset;
69 26142584 : (*offset)++;
70 : return i;
71 : }
72 :
73 : static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 : STATIC_ASSERT(0 == static_cast<int>(times_1));
75 : STATIC_ASSERT(1 == static_cast<int>(times_2));
76 : STATIC_ASSERT(2 == static_cast<int>(times_4));
77 : STATIC_ASSERT(3 == static_cast<int>(times_8));
78 1459591 : int scale = static_cast<int>(mode - one);
79 : DCHECK(scale >= 0 && scale < 4);
80 1459591 : return static_cast<ScaleFactor>(scale);
81 : }
82 :
83 14598246 : Operand MemoryOperand(size_t* offset) {
84 14598246 : AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 14598246 : switch (mode) {
86 : case kMode_MR: {
87 16555990 : Register base = InputRegister(NextOffset(offset));
88 : int32_t disp = 0;
89 2117090 : return Operand(base, disp);
90 : }
91 : case kMode_MRI: {
92 9572478 : Register base = InputRegister(NextOffset(offset));
93 9572478 : int32_t disp = InputInt32(NextOffset(offset));
94 9572508 : return Operand(base, disp);
95 : }
96 : case kMode_MR1:
97 : case kMode_MR2:
98 : case kMode_MR4:
99 : case kMode_MR8: {
100 753301 : Register base = InputRegister(NextOffset(offset));
101 753301 : Register index = InputRegister(NextOffset(offset));
102 : ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 : int32_t disp = 0;
104 753301 : return Operand(base, index, scale, disp);
105 : }
106 : case kMode_MR1I:
107 : case kMode_MR2I:
108 : case kMode_MR4I:
109 : case kMode_MR8I: {
110 536440 : Register base = InputRegister(NextOffset(offset));
111 536440 : Register index = InputRegister(NextOffset(offset));
112 : ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 536440 : int32_t disp = InputInt32(NextOffset(offset));
114 536441 : return Operand(base, index, scale, disp);
115 : }
116 : case kMode_M1: {
117 0 : Register base = InputRegister(NextOffset(offset));
118 : int32_t disp = 0;
119 0 : return Operand(base, disp);
120 : }
121 : case kMode_M2:
122 0 : UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 : return Operand(no_reg, 0);
124 : case kMode_M4:
125 : case kMode_M8: {
126 24171 : Register index = InputRegister(NextOffset(offset));
127 : ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 : int32_t disp = 0;
129 24171 : return Operand(index, scale, disp);
130 : }
131 : case kMode_M1I:
132 : case kMode_M2I:
133 : case kMode_M4I:
134 : case kMode_M8I: {
135 145679 : Register index = InputRegister(NextOffset(offset));
136 : ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 145679 : int32_t disp = InputInt32(NextOffset(offset));
138 145679 : return Operand(index, scale, disp);
139 : }
140 : case kMode_Root: {
141 1449087 : Register base = kRootRegister;
142 1449087 : int32_t disp = InputInt32(NextOffset(offset));
143 1449090 : return Operand(base, disp);
144 : }
145 : case kMode_None:
146 0 : UNREACHABLE();
147 : }
148 0 : UNREACHABLE();
149 : }
150 :
151 : Operand MemoryOperand(size_t first_input = 0) {
152 7878294 : return MemoryOperand(&first_input);
153 : }
154 : };
155 :
156 : namespace {
157 :
158 : bool HasImmediateInput(Instruction* instr, size_t index) {
159 19853568 : return instr->InputAt(index)->IsImmediate();
160 : }
161 :
162 0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
163 : public:
164 : OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
165 183 : : OutOfLineCode(gen), result_(result) {}
166 :
167 183 : void Generate() final {
168 366 : __ Xorps(result_, result_);
169 : __ Divss(result_, result_);
170 183 : }
171 :
172 : private:
173 : XMMRegister const result_;
174 : };
175 :
176 0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
177 : public:
178 : OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
179 647 : : OutOfLineCode(gen), result_(result) {}
180 :
181 647 : void Generate() final {
182 1294 : __ Xorpd(result_, result_);
183 : __ Divsd(result_, result_);
184 647 : }
185 :
186 : private:
187 : XMMRegister const result_;
188 : };
189 :
190 0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
191 : public:
192 111762 : OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
193 : XMMRegister input, StubCallMode stub_mode,
194 : UnwindingInfoWriter* unwinding_info_writer)
195 : : OutOfLineCode(gen),
196 : result_(result),
197 : input_(input),
198 : stub_mode_(stub_mode),
199 : unwinding_info_writer_(unwinding_info_writer),
200 : isolate_(gen->isolate()),
201 167658 : zone_(gen->zone()) {}
202 :
203 55882 : void Generate() final {
204 335356 : __ subp(rsp, Immediate(kDoubleSize));
205 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
206 111780 : kDoubleSize);
207 111791 : __ Movsd(MemOperand(rsp, 0), input_);
208 55897 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
209 : // A direct call to a wasm runtime stub defined in this module.
210 : // Just encode the stub index. This will be patched when the code
211 : // is added to the native module and copied into wasm code space.
212 1651 : __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
213 : } else {
214 108485 : __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
215 : }
216 167683 : __ movl(result_, MemOperand(rsp, 0));
217 55896 : __ addp(rsp, Immediate(kDoubleSize));
218 : unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
219 111794 : -kDoubleSize);
220 55897 : }
221 :
222 : private:
223 : Register const result_;
224 : XMMRegister const input_;
225 : StubCallMode stub_mode_;
226 : UnwindingInfoWriter* const unwinding_info_writer_;
227 : Isolate* isolate_;
228 : Zone* zone_;
229 : };
230 :
231 0 : class OutOfLineRecordWrite final : public OutOfLineCode {
232 : public:
233 313550 : OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
234 : Register value, Register scratch0, Register scratch1,
235 : RecordWriteMode mode, StubCallMode stub_mode)
236 : : OutOfLineCode(gen),
237 : object_(object),
238 : operand_(operand),
239 : value_(value),
240 : scratch0_(scratch0),
241 : scratch1_(scratch1),
242 : mode_(mode),
243 : stub_mode_(stub_mode),
244 627100 : zone_(gen->zone()) {}
245 :
246 313550 : void Generate() final {
247 313550 : if (mode_ > RecordWriteMode::kValueIsPointer) {
248 1502470 : __ JumpIfSmi(value_, exit());
249 : }
250 : __ CheckPageFlag(value_, scratch0_,
251 : MemoryChunk::kPointersToHereAreInterestingMask, zero,
252 627100 : exit());
253 313550 : __ leap(scratch1_, operand_);
254 :
255 : RememberedSetAction const remembered_set_action =
256 : mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
257 313550 : : OMIT_REMEMBERED_SET;
258 : SaveFPRegsMode const save_fp_mode =
259 627100 : frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
260 :
261 313550 : if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
262 : // A direct call to a wasm runtime stub defined in this module.
263 : // Just encode the stub index. This will be patched when the code
264 : // is added to the native module and copied into wasm code space.
265 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
266 171 : save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
267 : } else {
268 : __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
269 313379 : save_fp_mode);
270 : }
271 313550 : }
272 :
273 : private:
274 : Register const object_;
275 : Operand const operand_;
276 : Register const value_;
277 : Register const scratch0_;
278 : Register const scratch1_;
279 : RecordWriteMode const mode_;
280 : StubCallMode const stub_mode_;
281 : Zone* zone_;
282 : };
283 :
284 0 : class WasmOutOfLineTrap : public OutOfLineCode {
285 : public:
286 : WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
287 503600 : : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
288 :
289 47278 : void Generate() override {
290 94556 : X64OperandConverter i(gen_, instr_);
291 : TrapId trap_id =
292 47278 : static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
293 : GenerateWithTrapId(trap_id);
294 47304 : }
295 :
296 : protected:
297 : CodeGenerator* gen_;
298 :
299 503567 : void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
300 :
301 : private:
302 503552 : void GenerateCallToTrap(TrapId trap_id) {
303 1007150 : if (!gen_->wasm_runtime_exception_support()) {
304 : // We cannot test calls to the runtime in cctest/test-run-wasm.
305 : // Therefore we emit a call to C here instead of a call to the runtime.
306 1767589 : __ PrepareCallCFunction(0);
307 : __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
308 760290 : 0);
309 380145 : __ LeaveFrame(StackFrame::WASM_COMPILED);
310 760290 : auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
311 : size_t pop_size =
312 380145 : call_descriptor->StackParameterCount() * kSystemPointerSize;
313 : // Use rcx as a scratch register, we return anyways immediately.
314 760290 : __ Ret(static_cast<int>(pop_size), rcx);
315 : } else {
316 123413 : gen_->AssembleSourcePosition(instr_);
317 : // A direct call to a wasm runtime stub defined in this module.
318 : // Just encode the stub index. This will be patched when the code
319 : // is added to the native module and copied into wasm code space.
320 246928 : __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
321 : ReferenceMap* reference_map =
322 123453 : new (gen_->zone()) ReferenceMap(gen_->zone());
323 : gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
324 123468 : Safepoint::kNoLazyDeopt);
325 123545 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
326 : }
327 503649 : }
328 :
329 : Instruction* instr_;
330 : };
331 :
332 0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
333 : public:
334 : WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
335 456281 : : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
336 :
337 456260 : void Generate() final {
338 912520 : gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
339 456278 : GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
340 456349 : }
341 :
342 : private:
343 : int pc_;
344 : };
345 :
346 10711446 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
347 : InstructionCode opcode, Instruction* instr,
348 : X64OperandConverter& i, int pc) {
349 : const MemoryAccessMode access_mode =
350 10711446 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
351 10711446 : if (access_mode == kMemoryAccessProtected) {
352 : new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
353 : }
354 10711406 : }
355 :
356 10018014 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
357 : InstructionCode opcode, Instruction* instr,
358 : X64OperandConverter& i) {
359 : const MemoryAccessMode access_mode =
360 10018014 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
361 10018014 : if (access_mode == kMemoryAccessPoisoned) {
362 0 : Register value = i.OutputRegister();
363 0 : codegen->tasm()->andq(value, kSpeculationPoisonRegister);
364 : }
365 10018014 : }
366 :
367 : } // namespace
368 :
369 : #define ASSEMBLE_UNOP(asm_instr) \
370 : do { \
371 : if (instr->Output()->IsRegister()) { \
372 : __ asm_instr(i.OutputRegister()); \
373 : } else { \
374 : __ asm_instr(i.OutputOperand()); \
375 : } \
376 : } while (false)
377 :
378 : #define ASSEMBLE_BINOP(asm_instr) \
379 : do { \
380 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
381 : size_t index = 1; \
382 : Operand right = i.MemoryOperand(&index); \
383 : __ asm_instr(i.InputRegister(0), right); \
384 : } else { \
385 : if (HasImmediateInput(instr, 1)) { \
386 : if (instr->InputAt(0)->IsRegister()) { \
387 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
388 : } else { \
389 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
390 : } \
391 : } else { \
392 : if (instr->InputAt(1)->IsRegister()) { \
393 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
394 : } else { \
395 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
396 : } \
397 : } \
398 : } \
399 : } while (false)
400 :
401 : #define ASSEMBLE_COMPARE(asm_instr) \
402 : do { \
403 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
404 : size_t index = 0; \
405 : Operand left = i.MemoryOperand(&index); \
406 : if (HasImmediateInput(instr, index)) { \
407 : __ asm_instr(left, i.InputImmediate(index)); \
408 : } else { \
409 : __ asm_instr(left, i.InputRegister(index)); \
410 : } \
411 : } else { \
412 : if (HasImmediateInput(instr, 1)) { \
413 : if (instr->InputAt(0)->IsRegister()) { \
414 : __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
415 : } else { \
416 : __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
417 : } \
418 : } else { \
419 : if (instr->InputAt(1)->IsRegister()) { \
420 : __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
421 : } else { \
422 : __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
423 : } \
424 : } \
425 : } \
426 : } while (false)
427 :
428 : #define ASSEMBLE_MULT(asm_instr) \
429 : do { \
430 : if (HasImmediateInput(instr, 1)) { \
431 : if (instr->InputAt(0)->IsRegister()) { \
432 : __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
433 : i.InputImmediate(1)); \
434 : } else { \
435 : __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
436 : i.InputImmediate(1)); \
437 : } \
438 : } else { \
439 : if (instr->InputAt(1)->IsRegister()) { \
440 : __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
441 : } else { \
442 : __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
443 : } \
444 : } \
445 : } while (false)
446 :
447 : #define ASSEMBLE_SHIFT(asm_instr, width) \
448 : do { \
449 : if (HasImmediateInput(instr, 1)) { \
450 : if (instr->Output()->IsRegister()) { \
451 : __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
452 : } else { \
453 : __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
454 : } \
455 : } else { \
456 : if (instr->Output()->IsRegister()) { \
457 : __ asm_instr##_cl(i.OutputRegister()); \
458 : } else { \
459 : __ asm_instr##_cl(i.OutputOperand()); \
460 : } \
461 : } \
462 : } while (false)
463 :
464 : #define ASSEMBLE_MOVX(asm_instr) \
465 : do { \
466 : if (instr->addressing_mode() != kMode_None) { \
467 : __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
468 : } else if (instr->InputAt(0)->IsRegister()) { \
469 : __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
470 : } else { \
471 : __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
472 : } \
473 : } while (false)
474 :
475 : #define ASSEMBLE_SSE_BINOP(asm_instr) \
476 : do { \
477 : if (instr->InputAt(1)->IsFPRegister()) { \
478 : __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
479 : } else { \
480 : __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
481 : } \
482 : } while (false)
483 :
484 : #define ASSEMBLE_SSE_UNOP(asm_instr) \
485 : do { \
486 : if (instr->InputAt(0)->IsFPRegister()) { \
487 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
488 : } else { \
489 : __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
490 : } \
491 : } while (false)
492 :
493 : #define ASSEMBLE_AVX_BINOP(asm_instr) \
494 : do { \
495 : CpuFeatureScope avx_scope(tasm(), AVX); \
496 : if (instr->InputAt(1)->IsFPRegister()) { \
497 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
498 : i.InputDoubleRegister(1)); \
499 : } else { \
500 : __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
501 : i.InputOperand(1)); \
502 : } \
503 : } while (false)
504 :
505 : #define ASSEMBLE_IEEE754_BINOP(name) \
506 : do { \
507 : __ PrepareCallCFunction(2); \
508 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
509 : } while (false)
510 :
511 : #define ASSEMBLE_IEEE754_UNOP(name) \
512 : do { \
513 : __ PrepareCallCFunction(1); \
514 : __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
515 : } while (false)
516 :
517 : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
518 : do { \
519 : Label binop; \
520 : __ bind(&binop); \
521 : __ mov_inst(rax, i.MemoryOperand(1)); \
522 : __ movl(i.TempRegister(0), rax); \
523 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
524 : __ lock(); \
525 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
526 : __ j(not_equal, &binop); \
527 : } while (false)
528 :
529 : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
530 : do { \
531 : Label binop; \
532 : __ bind(&binop); \
533 : __ mov_inst(rax, i.MemoryOperand(1)); \
534 : __ movq(i.TempRegister(0), rax); \
535 : __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
536 : __ lock(); \
537 : __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
538 : __ j(not_equal, &binop); \
539 : } while (false)
540 :
541 : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
542 : do { \
543 : if (instr->InputAt(index)->IsSimd128Register()) { \
544 : __ opcode(dst_operand, i.InputSimd128Register(index)); \
545 : } else { \
546 : __ opcode(dst_operand, i.InputOperand(index)); \
547 : } \
548 : } while (false)
549 :
550 : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
551 : do { \
552 : if (instr->InputAt(index)->IsSimd128Register()) { \
553 : __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
554 : } else { \
555 : __ opcode(dst_operand, i.InputOperand(index), imm); \
556 : } \
557 : } while (false)
558 :
559 : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
560 : do { \
561 : XMMRegister dst = i.OutputSimd128Register(); \
562 : DCHECK_EQ(dst, i.InputSimd128Register(0)); \
563 : byte input_index = instr->InputCount() == 2 ? 1 : 0; \
564 : ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
565 : } while (false)
566 :
567 : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
568 : do { \
569 : CpuFeatureScope sse_scope(tasm(), SSELevel); \
570 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
571 : __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
572 : } while (false)
573 :
574 2958864 : void CodeGenerator::AssembleDeconstructFrame() {
575 5917728 : unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
576 2958931 : __ movq(rsp, rbp);
577 2959047 : __ popq(rbp);
578 2959037 : }
579 :
580 236584 : void CodeGenerator::AssemblePrepareTailCall() {
581 118292 : if (frame_access_state()->has_frame()) {
582 144204 : __ movq(rbp, MemOperand(rbp, 0));
583 : }
584 : frame_access_state()->SetFrameAccessToSP();
585 118292 : }
586 :
587 1344 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
588 : Register scratch1,
589 : Register scratch2,
590 : Register scratch3) {
591 : DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
592 1344 : Label done;
593 :
594 : // Check if current frame is an arguments adaptor frame.
595 : __ cmpp(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
596 1344 : Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
597 1344 : __ j(not_equal, &done, Label::kNear);
598 :
599 : // Load arguments count from current arguments adaptor frame (note, it
600 : // does not include receiver).
601 1344 : Register caller_args_count_reg = scratch1;
602 : __ SmiUntag(caller_args_count_reg,
603 1344 : Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
604 :
605 : ParameterCount callee_args_count(args_reg);
606 : __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
607 1344 : scratch3);
608 1344 : __ bind(&done);
609 1344 : }
610 :
611 : namespace {
612 :
613 264733 : void AdjustStackPointerForTailCall(Assembler* assembler,
614 : FrameAccessState* state,
615 : int new_slot_above_sp,
616 : bool allow_shrinkage = true) {
617 : int current_sp_offset = state->GetSPToFPSlotCount() +
618 264733 : StandardFrameConstants::kFixedSlotCountAboveFp;
619 264733 : int stack_slot_delta = new_slot_above_sp - current_sp_offset;
620 264733 : if (stack_slot_delta > 0) {
621 1820 : assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
622 : state->IncreaseSPDelta(stack_slot_delta);
623 263823 : } else if (allow_shrinkage && stack_slot_delta < 0) {
624 142448 : assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
625 : state->IncreaseSPDelta(stack_slot_delta);
626 : }
627 264733 : }
628 :
629 5960 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
630 5960 : int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
631 5960 : assembler->movq(kScratchRegister, shuffle_mask);
632 5960 : assembler->Push(kScratchRegister);
633 5960 : shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
634 5960 : assembler->movq(kScratchRegister, shuffle_mask);
635 5960 : assembler->Push(kScratchRegister);
636 5960 : }
637 :
638 : } // namespace
639 :
640 118307 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
641 292852 : int first_unused_stack_slot) {
642 118307 : CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
643 : ZoneVector<MoveOperands*> pushes(zone());
644 118307 : GetPushCompatibleMoves(instr, flags, &pushes);
645 :
646 129861 : if (!pushes.empty() &&
647 23108 : (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
648 : first_unused_stack_slot)) {
649 : X64OperandConverter g(this, instr);
650 51227 : for (auto move : pushes) {
651 : LocationOperand destination_location(
652 : LocationOperand::cast(move->destination()));
653 28119 : InstructionOperand source(move->source());
654 28119 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
655 28119 : destination_location.index());
656 28119 : if (source.IsStackSlot()) {
657 : LocationOperand source_location(LocationOperand::cast(source));
658 11569 : __ Push(g.SlotToOperand(source_location.index()));
659 16550 : } else if (source.IsRegister()) {
660 : LocationOperand source_location(LocationOperand::cast(source));
661 16550 : __ Push(source_location.GetRegister());
662 0 : } else if (source.IsImmediate()) {
663 0 : __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
664 : } else {
665 : // Pushes of non-scalar data types is not supported.
666 0 : UNIMPLEMENTED();
667 : }
668 : frame_access_state()->IncreaseSPDelta(1);
669 : move->Eliminate();
670 : }
671 : }
672 118307 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
673 118307 : first_unused_stack_slot, false);
674 118307 : }
675 :
676 118307 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
677 118307 : int first_unused_stack_slot) {
678 118307 : AdjustStackPointerForTailCall(tasm(), frame_access_state(),
679 118307 : first_unused_stack_slot);
680 118307 : }
681 :
682 : // Check that {kJavaScriptCallCodeStartRegister} is correct.
683 108 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
684 108 : __ ComputeCodeStartAddress(rbx);
685 108 : __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
686 108 : __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
687 108 : }
688 :
689 : // Check if the code object is marked for deoptimization. If it is, then it
690 : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
691 : // to:
692 : // 1. read from memory the word that contains that bit, which can be found in
693 : // the flags in the referenced {CodeDataContainer} object;
694 : // 2. test kMarkedForDeoptimizationBit in those flags; and
695 : // 3. if it is not zero then it jumps to the builtin.
696 912176 : void CodeGenerator::BailoutIfDeoptimized() {
697 : int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
698 : __ LoadTaggedPointerField(rbx,
699 456073 : Operand(kJavaScriptCallCodeStartRegister, offset));
700 : __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
701 456104 : Immediate(1 << Code::kMarkedForDeoptimizationBit));
702 : __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
703 456103 : RelocInfo::CODE_TARGET, not_zero);
704 456106 : }
705 :
706 0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
707 : // Set a mask which has all bits set in the normal case, but has all
708 : // bits cleared if we are speculatively executing the wrong PC.
709 0 : __ ComputeCodeStartAddress(rbx);
710 0 : __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
711 0 : __ cmpp(kJavaScriptCallCodeStartRegister, rbx);
712 : __ movp(rbx, Immediate(-1));
713 0 : __ cmovq(equal, kSpeculationPoisonRegister, rbx);
714 0 : }
715 :
716 0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
717 0 : __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
718 0 : __ andq(kContextRegister, kSpeculationPoisonRegister);
719 0 : __ andq(rsp, kSpeculationPoisonRegister);
720 0 : }
721 :
722 : // Assembles an instruction after register allocation, producing machine code.
723 64857217 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
724 84746701 : Instruction* instr) {
725 : X64OperandConverter i(this, instr);
726 : InstructionCode opcode = instr->opcode();
727 64857217 : ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
728 64857217 : switch (arch_opcode) {
729 : case kArchCallCodeObject: {
730 4040731 : if (HasImmediateInput(instr, 0)) {
731 4032826 : Handle<Code> code = i.InputCode(0);
732 4032826 : __ Call(code, RelocInfo::CODE_TARGET);
733 : } else {
734 7935 : Register reg = i.InputRegister(0);
735 : DCHECK_IMPLIES(
736 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
737 : reg == kJavaScriptCallCodeStartRegister);
738 7935 : __ LoadCodeObjectEntry(reg, reg);
739 7933 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
740 0 : __ RetpolineCall(reg);
741 : } else {
742 7933 : __ call(reg);
743 : }
744 : }
745 4040766 : RecordCallPosition(instr);
746 : frame_access_state()->ClearSPDelta();
747 : break;
748 : }
749 : case kArchCallBuiltinPointer: {
750 : DCHECK(!HasImmediateInput(instr, 0));
751 11790 : Register builtin_pointer = i.InputRegister(0);
752 11790 : __ CallBuiltinPointer(builtin_pointer);
753 11790 : RecordCallPosition(instr);
754 : frame_access_state()->ClearSPDelta();
755 : break;
756 : }
757 : case kArchCallWasmFunction: {
758 1171159 : if (HasImmediateInput(instr, 0)) {
759 94900 : Constant constant = i.ToConstant(instr->InputAt(0));
760 94923 : Address wasm_code = static_cast<Address>(constant.ToInt64());
761 94923 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
762 94902 : __ near_call(wasm_code, constant.rmode());
763 : } else {
764 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
765 0 : __ RetpolineCall(wasm_code, constant.rmode());
766 : } else {
767 0 : __ Call(wasm_code, constant.rmode());
768 : }
769 : }
770 : } else {
771 1076259 : Register reg = i.InputRegister(0);
772 1076259 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
773 0 : __ RetpolineCall(reg);
774 : } else {
775 1076259 : __ call(reg);
776 : }
777 : }
778 1171174 : RecordCallPosition(instr);
779 : frame_access_state()->ClearSPDelta();
780 : break;
781 : }
782 : case kArchTailCallCodeObjectFromJSFunction:
783 : case kArchTailCallCodeObject: {
784 34516 : if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
785 : AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
786 : i.TempRegister(0), i.TempRegister(1),
787 1344 : i.TempRegister(2));
788 : }
789 34516 : if (HasImmediateInput(instr, 0)) {
790 29670 : Handle<Code> code = i.InputCode(0);
791 29670 : __ Jump(code, RelocInfo::CODE_TARGET);
792 : } else {
793 9692 : Register reg = i.InputRegister(0);
794 : DCHECK_IMPLIES(
795 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
796 : reg == kJavaScriptCallCodeStartRegister);
797 4846 : __ LoadCodeObjectEntry(reg, reg);
798 4846 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
799 0 : __ RetpolineJump(reg);
800 : } else {
801 4846 : __ jmp(reg);
802 : }
803 : }
804 : unwinding_info_writer_.MarkBlockWillExit();
805 : frame_access_state()->ClearSPDelta();
806 34516 : frame_access_state()->SetFrameAccessToDefault();
807 34516 : break;
808 : }
809 : case kArchTailCallWasm: {
810 0 : if (HasImmediateInput(instr, 0)) {
811 0 : Constant constant = i.ToConstant(instr->InputAt(0));
812 0 : Address wasm_code = static_cast<Address>(constant.ToInt64());
813 0 : if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
814 0 : __ near_jmp(wasm_code, constant.rmode());
815 : } else {
816 : __ Move(kScratchRegister, wasm_code, constant.rmode());
817 0 : __ jmp(kScratchRegister);
818 : }
819 : } else {
820 0 : Register reg = i.InputRegister(0);
821 0 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
822 0 : __ RetpolineJump(reg);
823 : } else {
824 0 : __ jmp(reg);
825 : }
826 : }
827 : unwinding_info_writer_.MarkBlockWillExit();
828 : frame_access_state()->ClearSPDelta();
829 0 : frame_access_state()->SetFrameAccessToDefault();
830 0 : break;
831 : }
832 : case kArchTailCallAddress: {
833 83776 : CHECK(!HasImmediateInput(instr, 0));
834 83776 : Register reg = i.InputRegister(0);
835 : DCHECK_IMPLIES(
836 : HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
837 : reg == kJavaScriptCallCodeStartRegister);
838 83776 : if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
839 0 : __ RetpolineJump(reg);
840 : } else {
841 83776 : __ jmp(reg);
842 : }
843 : unwinding_info_writer_.MarkBlockWillExit();
844 : frame_access_state()->ClearSPDelta();
845 83776 : frame_access_state()->SetFrameAccessToDefault();
846 : break;
847 : }
848 : case kArchCallJSFunction: {
849 : Register func = i.InputRegister(0);
850 105787 : if (FLAG_debug_code) {
851 : // Check the function's context matches the context argument.
852 19 : __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
853 19 : __ Assert(equal, AbortReason::kWrongFunctionContext);
854 : }
855 : static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
856 : __ LoadTaggedPointerField(rcx,
857 105787 : FieldOperand(func, JSFunction::kCodeOffset));
858 105787 : __ CallCodeObject(rcx);
859 : frame_access_state()->ClearSPDelta();
860 105788 : RecordCallPosition(instr);
861 : break;
862 : }
863 : case kArchPrepareCallCFunction: {
864 : // Frame alignment requires using FP-relative frame addressing.
865 : frame_access_state()->SetFrameAccessToFP();
866 26287 : int const num_parameters = MiscField::decode(instr->opcode());
867 26287 : __ PrepareCallCFunction(num_parameters);
868 26287 : break;
869 : }
870 : case kArchSaveCallerRegisters: {
871 : fp_mode_ =
872 677 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
873 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
874 : // kReturnRegister0 should have been saved before entering the stub.
875 677 : int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
876 : DCHECK(IsAligned(bytes, kSystemPointerSize));
877 : DCHECK_EQ(0, frame_access_state()->sp_delta());
878 677 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
879 : DCHECK(!caller_registers_saved_);
880 677 : caller_registers_saved_ = true;
881 677 : break;
882 : }
883 : case kArchRestoreCallerRegisters: {
884 : DCHECK(fp_mode_ ==
885 : static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
886 : DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
887 : // Don't overwrite the returned value.
888 677 : int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
889 677 : frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
890 : DCHECK_EQ(0, frame_access_state()->sp_delta());
891 : DCHECK(caller_registers_saved_);
892 677 : caller_registers_saved_ = false;
893 677 : break;
894 : }
895 : case kArchPrepareTailCall:
896 118292 : AssemblePrepareTailCall();
897 118292 : break;
898 : case kArchCallCFunction: {
899 : int const num_parameters = MiscField::decode(instr->opcode());
900 26287 : if (HasImmediateInput(instr, 0)) {
901 25142 : ExternalReference ref = i.InputExternalReference(0);
902 25142 : __ CallCFunction(ref, num_parameters);
903 : } else {
904 1145 : Register func = i.InputRegister(0);
905 1145 : __ CallCFunction(func, num_parameters);
906 : }
907 26287 : frame_access_state()->SetFrameAccessToDefault();
908 : // Ideally, we should decrement SP delta to match the change of stack
909 : // pointer in CallCFunction. However, for certain architectures (e.g.
910 : // ARM), there may be more strict alignment requirement, causing old SP
911 : // to be saved on the stack. In those cases, we can not calculate the SP
912 : // delta statically.
913 : frame_access_state()->ClearSPDelta();
914 26287 : if (caller_registers_saved_) {
915 : // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
916 : // Here, we assume the sequence to be:
917 : // kArchSaveCallerRegisters;
918 : // kArchCallCFunction;
919 : // kArchRestoreCallerRegisters;
920 : int bytes =
921 677 : __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
922 677 : frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
923 : }
924 : // TODO(tebbi): Do we need an lfence here?
925 : break;
926 : }
927 : case kArchJmp:
928 4863679 : AssembleArchJump(i.InputRpo(0));
929 4863712 : break;
930 : case kArchBinarySearchSwitch:
931 39506 : AssembleArchBinarySearchSwitch(instr);
932 39506 : break;
933 : case kArchLookupSwitch:
934 0 : AssembleArchLookupSwitch(instr);
935 0 : break;
936 : case kArchTableSwitch:
937 304 : AssembleArchTableSwitch(instr);
938 304 : break;
939 : case kArchComment:
940 5 : __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
941 5 : break;
942 : case kArchDebugAbort:
943 : DCHECK(i.InputRegister(0) == rdx);
944 170 : if (!frame_access_state()->has_frame()) {
945 : // We don't actually want to generate a pile of code for this, so just
946 : // claim there is a stack frame, without generating one.
947 0 : FrameScope scope(tasm(), StackFrame::NONE);
948 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
949 0 : RelocInfo::CODE_TARGET);
950 : } else {
951 : __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
952 170 : RelocInfo::CODE_TARGET);
953 : }
954 170 : __ int3();
955 : unwinding_info_writer_.MarkBlockWillExit();
956 : break;
957 : case kArchDebugBreak:
958 134153 : __ int3();
959 134153 : break;
960 : case kArchThrowTerminator:
961 : unwinding_info_writer_.MarkBlockWillExit();
962 : break;
963 : case kArchNop:
964 : // don't emit code for nops.
965 : break;
966 : case kArchDeoptimize: {
967 : int deopt_state_id =
968 47591 : BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
969 : CodeGenResult result =
970 47591 : AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
971 47591 : if (result != kSuccess) return result;
972 : unwinding_info_writer_.MarkBlockWillExit();
973 : break;
974 : }
975 : case kArchRet:
976 3264254 : AssembleReturn(instr->InputAt(0));
977 3264217 : break;
978 : case kArchStackPointer:
979 0 : __ movq(i.OutputRegister(), rsp);
980 : break;
981 : case kArchFramePointer:
982 30547 : __ movq(i.OutputRegister(), rbp);
983 : break;
984 : case kArchParentFramePointer:
985 49229 : if (frame_access_state()->has_frame()) {
986 77127 : __ movq(i.OutputRegister(), Operand(rbp, 0));
987 : } else {
988 23520 : __ movq(i.OutputRegister(), rbp);
989 : }
990 : break;
991 : case kArchTruncateDoubleToI: {
992 : auto result = i.OutputRegister();
993 : auto input = i.InputDoubleRegister(0);
994 : auto ool = new (zone()) OutOfLineTruncateDoubleToI(
995 : this, result, input, DetermineStubCallMode(),
996 111786 : &unwinding_info_writer_);
997 : // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
998 : // use of Cvttsd2siq requires the movl below to avoid sign extension.
999 55881 : __ Cvttsd2siq(result, input);
1000 55903 : __ cmpq(result, Immediate(1));
1001 55902 : __ j(overflow, ool->entry());
1002 55902 : __ bind(ool->exit());
1003 : __ movl(result, result);
1004 : break;
1005 : }
1006 : case kArchStoreWithWriteBarrier: {
1007 : RecordWriteMode mode =
1008 : static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1009 : Register object = i.InputRegister(0);
1010 313550 : size_t index = 0;
1011 313550 : Operand operand = i.MemoryOperand(&index);
1012 313550 : Register value = i.InputRegister(index);
1013 : Register scratch0 = i.TempRegister(0);
1014 : Register scratch1 = i.TempRegister(1);
1015 : auto ool = new (zone())
1016 : OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1017 627100 : mode, DetermineStubCallMode());
1018 313550 : __ movp(operand, value);
1019 : __ CheckPageFlag(object, scratch0,
1020 : MemoryChunk::kPointersFromHereAreInterestingMask,
1021 313550 : not_zero, ool->entry());
1022 313550 : __ bind(ool->exit());
1023 : break;
1024 : }
1025 : case kArchWordPoisonOnSpeculation:
1026 : DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1027 0 : __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1028 0 : break;
1029 : case kLFence:
1030 0 : __ lfence();
1031 0 : break;
1032 : case kArchStackSlot: {
1033 : FrameOffset offset =
1034 2512 : frame_access_state()->GetFrameOffset(i.InputInt32(0));
1035 1256 : Register base = offset.from_stack_pointer() ? rsp : rbp;
1036 3768 : __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1037 : break;
1038 : }
1039 : case kIeee754Float64Acos:
1040 117 : ASSEMBLE_IEEE754_UNOP(acos);
1041 117 : break;
1042 : case kIeee754Float64Acosh:
1043 117 : ASSEMBLE_IEEE754_UNOP(acosh);
1044 117 : break;
1045 : case kIeee754Float64Asin:
1046 117 : ASSEMBLE_IEEE754_UNOP(asin);
1047 117 : break;
1048 : case kIeee754Float64Asinh:
1049 117 : ASSEMBLE_IEEE754_UNOP(asinh);
1050 117 : break;
1051 : case kIeee754Float64Atan:
1052 137 : ASSEMBLE_IEEE754_UNOP(atan);
1053 137 : break;
1054 : case kIeee754Float64Atanh:
1055 117 : ASSEMBLE_IEEE754_UNOP(atanh);
1056 117 : break;
1057 : case kIeee754Float64Atan2:
1058 134 : ASSEMBLE_IEEE754_BINOP(atan2);
1059 134 : break;
1060 : case kIeee754Float64Cbrt:
1061 117 : ASSEMBLE_IEEE754_UNOP(cbrt);
1062 117 : break;
1063 : case kIeee754Float64Cos:
1064 295 : ASSEMBLE_IEEE754_UNOP(cos);
1065 295 : break;
1066 : case kIeee754Float64Cosh:
1067 124 : ASSEMBLE_IEEE754_UNOP(cosh);
1068 124 : break;
1069 : case kIeee754Float64Exp:
1070 152 : ASSEMBLE_IEEE754_UNOP(exp);
1071 152 : break;
1072 : case kIeee754Float64Expm1:
1073 124 : ASSEMBLE_IEEE754_UNOP(expm1);
1074 124 : break;
1075 : case kIeee754Float64Log:
1076 288 : ASSEMBLE_IEEE754_UNOP(log);
1077 288 : break;
1078 : case kIeee754Float64Log1p:
1079 117 : ASSEMBLE_IEEE754_UNOP(log1p);
1080 117 : break;
1081 : case kIeee754Float64Log2:
1082 117 : ASSEMBLE_IEEE754_UNOP(log2);
1083 117 : break;
1084 : case kIeee754Float64Log10:
1085 117 : ASSEMBLE_IEEE754_UNOP(log10);
1086 117 : break;
1087 : case kIeee754Float64Pow: {
1088 : // TODO(bmeurer): Improve integration of the stub.
1089 : __ Movsd(xmm2, xmm0);
1090 342 : __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1091 : __ Movsd(xmm0, xmm3);
1092 : break;
1093 : }
1094 : case kIeee754Float64Sin:
1095 298 : ASSEMBLE_IEEE754_UNOP(sin);
1096 298 : break;
1097 : case kIeee754Float64Sinh:
1098 124 : ASSEMBLE_IEEE754_UNOP(sinh);
1099 124 : break;
1100 : case kIeee754Float64Tan:
1101 173 : ASSEMBLE_IEEE754_UNOP(tan);
1102 173 : break;
1103 : case kIeee754Float64Tanh:
1104 124 : ASSEMBLE_IEEE754_UNOP(tanh);
1105 124 : break;
1106 : case kX64Add32:
1107 377628 : ASSEMBLE_BINOP(addl);
1108 : break;
1109 : case kX64Add:
1110 370993 : ASSEMBLE_BINOP(addq);
1111 : break;
1112 : case kX64Sub32:
1113 239071 : ASSEMBLE_BINOP(subl);
1114 : break;
1115 : case kX64Sub:
1116 261409 : ASSEMBLE_BINOP(subq);
1117 : break;
1118 : case kX64And32:
1119 956474 : ASSEMBLE_BINOP(andl);
1120 : break;
1121 : case kX64And:
1122 1313217 : ASSEMBLE_BINOP(andq);
1123 : break;
1124 : case kX64Cmp8:
1125 39927 : ASSEMBLE_COMPARE(cmpb);
1126 : break;
1127 : case kX64Cmp16:
1128 1595502 : ASSEMBLE_COMPARE(cmpw);
1129 : break;
1130 : case kX64Cmp32:
1131 6188112 : ASSEMBLE_COMPARE(cmpl);
1132 : break;
1133 : case kX64Cmp:
1134 8344623 : ASSEMBLE_COMPARE(cmpq);
1135 : break;
1136 : case kX64Test8:
1137 309193 : ASSEMBLE_COMPARE(testb);
1138 : break;
1139 : case kX64Test16:
1140 119287 : ASSEMBLE_COMPARE(testw);
1141 : break;
1142 : case kX64Test32:
1143 517169 : ASSEMBLE_COMPARE(testl);
1144 : break;
1145 : case kX64Test:
1146 3131043 : ASSEMBLE_COMPARE(testq);
1147 : break;
1148 : case kX64Imul32:
1149 315568 : ASSEMBLE_MULT(imull);
1150 : break;
1151 : case kX64Imul:
1152 85328 : ASSEMBLE_MULT(imulq);
1153 : break;
1154 : case kX64ImulHigh32:
1155 15456 : if (instr->InputAt(1)->IsRegister()) {
1156 7728 : __ imull(i.InputRegister(1));
1157 : } else {
1158 0 : __ imull(i.InputOperand(1));
1159 : }
1160 : break;
1161 : case kX64UmulHigh32:
1162 3106 : if (instr->InputAt(1)->IsRegister()) {
1163 1553 : __ mull(i.InputRegister(1));
1164 : } else {
1165 0 : __ mull(i.InputOperand(1));
1166 : }
1167 : break;
1168 : case kX64Idiv32:
1169 74564 : __ cdq();
1170 74564 : __ idivl(i.InputRegister(1));
1171 : break;
1172 : case kX64Idiv:
1173 4670 : __ cqo();
1174 4670 : __ idivq(i.InputRegister(1));
1175 : break;
1176 : case kX64Udiv32:
1177 71331 : __ xorl(rdx, rdx);
1178 71331 : __ divl(i.InputRegister(1));
1179 : break;
1180 : case kX64Udiv:
1181 3637 : __ xorq(rdx, rdx);
1182 3637 : __ divq(i.InputRegister(1));
1183 : break;
1184 : case kX64Not:
1185 110 : ASSEMBLE_UNOP(notq);
1186 : break;
1187 : case kX64Not32:
1188 6846 : ASSEMBLE_UNOP(notl);
1189 : break;
1190 : case kX64Neg:
1191 21304 : ASSEMBLE_UNOP(negq);
1192 : break;
1193 : case kX64Neg32:
1194 14090 : ASSEMBLE_UNOP(negl);
1195 : break;
1196 : case kX64Or32:
1197 443541 : ASSEMBLE_BINOP(orl);
1198 : break;
1199 : case kX64Or:
1200 362694 : ASSEMBLE_BINOP(orq);
1201 : break;
1202 : case kX64Xor32:
1203 192976 : ASSEMBLE_BINOP(xorl);
1204 : break;
1205 : case kX64Xor:
1206 1571 : ASSEMBLE_BINOP(xorq);
1207 : break;
1208 : case kX64Shl32:
1209 199011 : ASSEMBLE_SHIFT(shll, 5);
1210 : break;
1211 : case kX64Shl:
1212 1500647 : ASSEMBLE_SHIFT(shlq, 6);
1213 : break;
1214 : case kX64Shr32:
1215 530612 : ASSEMBLE_SHIFT(shrl, 5);
1216 : break;
1217 : case kX64Shr:
1218 1502533 : ASSEMBLE_SHIFT(shrq, 6);
1219 : break;
1220 : case kX64Sar32:
1221 181693 : ASSEMBLE_SHIFT(sarl, 5);
1222 : break;
1223 : case kX64Sar:
1224 721497 : ASSEMBLE_SHIFT(sarq, 6);
1225 : break;
1226 : case kX64Ror32:
1227 272648 : ASSEMBLE_SHIFT(rorl, 5);
1228 : break;
1229 : case kX64Ror:
1230 618 : ASSEMBLE_SHIFT(rorq, 6);
1231 : break;
1232 : case kX64Lzcnt:
1233 94 : if (instr->InputAt(0)->IsRegister()) {
1234 47 : __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1235 : } else {
1236 0 : __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1237 : }
1238 : break;
1239 : case kX64Lzcnt32:
1240 1694 : if (instr->InputAt(0)->IsRegister()) {
1241 847 : __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1242 : } else {
1243 0 : __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1244 : }
1245 : break;
1246 : case kX64Tzcnt:
1247 112 : if (instr->InputAt(0)->IsRegister()) {
1248 56 : __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1249 : } else {
1250 0 : __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1251 : }
1252 : break;
1253 : case kX64Tzcnt32:
1254 1474 : if (instr->InputAt(0)->IsRegister()) {
1255 737 : __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1256 : } else {
1257 0 : __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1258 : }
1259 : break;
1260 : case kX64Popcnt:
1261 124 : if (instr->InputAt(0)->IsRegister()) {
1262 62 : __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1263 : } else {
1264 0 : __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1265 : }
1266 : break;
1267 : case kX64Popcnt32:
1268 266 : if (instr->InputAt(0)->IsRegister()) {
1269 133 : __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1270 : } else {
1271 0 : __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1272 : }
1273 : break;
1274 : case kX64Bswap:
1275 15 : __ bswapq(i.OutputRegister());
1276 15 : break;
1277 : case kX64Bswap32:
1278 55 : __ bswapl(i.OutputRegister());
1279 55 : break;
1280 : case kSSEFloat32Cmp:
1281 0 : ASSEMBLE_SSE_BINOP(Ucomiss);
1282 : break;
1283 : case kSSEFloat32Add:
1284 0 : ASSEMBLE_SSE_BINOP(addss);
1285 : break;
1286 : case kSSEFloat32Sub:
1287 0 : ASSEMBLE_SSE_BINOP(subss);
1288 : break;
1289 : case kSSEFloat32Mul:
1290 0 : ASSEMBLE_SSE_BINOP(mulss);
1291 : break;
1292 : case kSSEFloat32Div:
1293 0 : ASSEMBLE_SSE_BINOP(divss);
1294 : // Don't delete this mov. It may improve performance on some CPUs,
1295 : // when there is a (v)mulss depending on the result.
1296 0 : __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1297 0 : break;
1298 : case kSSEFloat32Abs: {
1299 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1300 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1301 0 : __ psrlq(kScratchDoubleReg, 33);
1302 0 : __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1303 0 : break;
1304 : }
1305 : case kSSEFloat32Neg: {
1306 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1307 0 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1308 0 : __ psllq(kScratchDoubleReg, 31);
1309 0 : __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1310 0 : break;
1311 : }
1312 : case kSSEFloat32Sqrt:
1313 627 : ASSEMBLE_SSE_UNOP(sqrtss);
1314 : break;
1315 : case kSSEFloat32ToFloat64:
1316 72489 : ASSEMBLE_SSE_UNOP(Cvtss2sd);
1317 : break;
1318 : case kSSEFloat32Round: {
1319 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1320 : RoundingMode const mode =
1321 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1322 : __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1323 : break;
1324 : }
1325 : case kSSEFloat32ToInt32:
1326 1050 : if (instr->InputAt(0)->IsFPRegister()) {
1327 525 : __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1328 : } else {
1329 0 : __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1330 : }
1331 : break;
1332 : case kSSEFloat32ToUint32: {
1333 150 : if (instr->InputAt(0)->IsFPRegister()) {
1334 75 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1335 : } else {
1336 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1337 : }
1338 : break;
1339 : }
1340 : case kSSEFloat64Cmp:
1341 3448 : ASSEMBLE_SSE_BINOP(Ucomisd);
1342 : break;
1343 : case kSSEFloat64Add:
1344 960 : ASSEMBLE_SSE_BINOP(addsd);
1345 : break;
1346 : case kSSEFloat64Sub:
1347 660 : ASSEMBLE_SSE_BINOP(subsd);
1348 : break;
1349 : case kSSEFloat64Mul:
1350 126 : ASSEMBLE_SSE_BINOP(mulsd);
1351 : break;
1352 : case kSSEFloat64Div:
1353 78 : ASSEMBLE_SSE_BINOP(divsd);
1354 : // Don't delete this mov. It may improve performance on some CPUs,
1355 : // when there is a (v)mulsd depending on the result.
1356 26 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1357 : break;
1358 : case kSSEFloat64Mod: {
1359 1628 : __ subq(rsp, Immediate(kDoubleSize));
1360 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1361 3256 : kDoubleSize);
1362 : // Move values to st(0) and st(1).
1363 4884 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1364 1628 : __ fld_d(Operand(rsp, 0));
1365 4884 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1366 1628 : __ fld_d(Operand(rsp, 0));
1367 : // Loop while fprem isn't done.
1368 1628 : Label mod_loop;
1369 1628 : __ bind(&mod_loop);
1370 : // This instructions traps on all kinds inputs, but we are assuming the
1371 : // floating point control word is set to ignore them all.
1372 1628 : __ fprem();
1373 : // The following 2 instruction implicitly use rax.
1374 1628 : __ fnstsw_ax();
1375 1628 : if (CpuFeatures::IsSupported(SAHF)) {
1376 : CpuFeatureScope sahf_scope(tasm(), SAHF);
1377 1596 : __ sahf();
1378 : } else {
1379 : __ shrl(rax, Immediate(8));
1380 32 : __ andl(rax, Immediate(0xFF));
1381 32 : __ pushq(rax);
1382 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1383 64 : kSystemPointerSize);
1384 32 : __ popfq();
1385 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1386 64 : -kSystemPointerSize);
1387 : }
1388 1628 : __ j(parity_even, &mod_loop);
1389 : // Move output to stack and clean up.
1390 1628 : __ fstp(1);
1391 1628 : __ fstp_d(Operand(rsp, 0));
1392 3256 : __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1393 1628 : __ addq(rsp, Immediate(kDoubleSize));
1394 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1395 3256 : -kDoubleSize);
1396 : break;
1397 : }
1398 : case kSSEFloat32Max: {
1399 89 : Label compare_nan, compare_swap, done_compare;
1400 178 : if (instr->InputAt(1)->IsFPRegister()) {
1401 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1402 : } else {
1403 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1404 : }
1405 : auto ool =
1406 89 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1407 89 : __ j(parity_even, ool->entry());
1408 89 : __ j(above, &done_compare, Label::kNear);
1409 89 : __ j(below, &compare_swap, Label::kNear);
1410 89 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1411 : __ testl(kScratchRegister, Immediate(1));
1412 89 : __ j(zero, &done_compare, Label::kNear);
1413 89 : __ bind(&compare_swap);
1414 178 : if (instr->InputAt(1)->IsFPRegister()) {
1415 89 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1416 : } else {
1417 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1418 : }
1419 89 : __ bind(&done_compare);
1420 89 : __ bind(ool->exit());
1421 : break;
1422 : }
1423 : case kSSEFloat32Min: {
1424 94 : Label compare_swap, done_compare;
1425 188 : if (instr->InputAt(1)->IsFPRegister()) {
1426 : __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1427 : } else {
1428 0 : __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1429 : }
1430 : auto ool =
1431 94 : new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1432 94 : __ j(parity_even, ool->entry());
1433 94 : __ j(below, &done_compare, Label::kNear);
1434 94 : __ j(above, &compare_swap, Label::kNear);
1435 188 : if (instr->InputAt(1)->IsFPRegister()) {
1436 94 : __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1437 : } else {
1438 0 : __ Movss(kScratchDoubleReg, i.InputOperand(1));
1439 : __ Movmskps(kScratchRegister, kScratchDoubleReg);
1440 : }
1441 : __ testl(kScratchRegister, Immediate(1));
1442 94 : __ j(zero, &done_compare, Label::kNear);
1443 94 : __ bind(&compare_swap);
1444 188 : if (instr->InputAt(1)->IsFPRegister()) {
1445 94 : __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1446 : } else {
1447 0 : __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1448 : }
1449 94 : __ bind(&done_compare);
1450 94 : __ bind(ool->exit());
1451 : break;
1452 : }
1453 : case kSSEFloat64Max: {
1454 282 : Label compare_nan, compare_swap, done_compare;
1455 564 : if (instr->InputAt(1)->IsFPRegister()) {
1456 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1457 : } else {
1458 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1459 : }
1460 : auto ool =
1461 282 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1462 282 : __ j(parity_even, ool->entry());
1463 282 : __ j(above, &done_compare, Label::kNear);
1464 282 : __ j(below, &compare_swap, Label::kNear);
1465 282 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1466 : __ testl(kScratchRegister, Immediate(1));
1467 282 : __ j(zero, &done_compare, Label::kNear);
1468 282 : __ bind(&compare_swap);
1469 564 : if (instr->InputAt(1)->IsFPRegister()) {
1470 282 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1471 : } else {
1472 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1473 : }
1474 282 : __ bind(&done_compare);
1475 282 : __ bind(ool->exit());
1476 : break;
1477 : }
1478 : case kSSEFloat64Min: {
1479 365 : Label compare_swap, done_compare;
1480 730 : if (instr->InputAt(1)->IsFPRegister()) {
1481 : __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1482 : } else {
1483 0 : __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1484 : }
1485 : auto ool =
1486 365 : new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1487 365 : __ j(parity_even, ool->entry());
1488 365 : __ j(below, &done_compare, Label::kNear);
1489 365 : __ j(above, &compare_swap, Label::kNear);
1490 730 : if (instr->InputAt(1)->IsFPRegister()) {
1491 365 : __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1492 : } else {
1493 0 : __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1494 : __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1495 : }
1496 : __ testl(kScratchRegister, Immediate(1));
1497 365 : __ j(zero, &done_compare, Label::kNear);
1498 365 : __ bind(&compare_swap);
1499 730 : if (instr->InputAt(1)->IsFPRegister()) {
1500 365 : __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1501 : } else {
1502 0 : __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1503 : }
1504 365 : __ bind(&done_compare);
1505 365 : __ bind(ool->exit());
1506 : break;
1507 : }
1508 : case kSSEFloat64Abs: {
1509 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1510 6 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1511 6 : __ psrlq(kScratchDoubleReg, 1);
1512 12 : __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1513 6 : break;
1514 : }
1515 : case kSSEFloat64Neg: {
1516 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1517 76 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1518 76 : __ psllq(kScratchDoubleReg, 63);
1519 152 : __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1520 76 : break;
1521 : }
1522 : case kSSEFloat64Sqrt:
1523 954 : ASSEMBLE_SSE_UNOP(Sqrtsd);
1524 : break;
1525 : case kSSEFloat64Round: {
1526 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
1527 : RoundingMode const mode =
1528 : static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1529 : __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1530 : break;
1531 : }
1532 : case kSSEFloat64ToFloat32:
1533 61212 : ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1534 : break;
1535 : case kSSEFloat64ToInt32:
1536 265016 : if (instr->InputAt(0)->IsFPRegister()) {
1537 108459 : __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1538 : } else {
1539 48098 : __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1540 : }
1541 : break;
1542 : case kSSEFloat64ToUint32: {
1543 1790 : if (instr->InputAt(0)->IsFPRegister()) {
1544 895 : __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1545 : } else {
1546 0 : __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1547 : }
1548 1790 : if (MiscField::decode(instr->opcode())) {
1549 1628 : __ AssertZeroExtended(i.OutputRegister());
1550 : }
1551 : break;
1552 : }
1553 : case kSSEFloat32ToInt64:
1554 142 : if (instr->InputAt(0)->IsFPRegister()) {
1555 71 : __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1556 : } else {
1557 0 : __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1558 : }
1559 71 : if (instr->OutputCount() > 1) {
1560 132 : __ Set(i.OutputRegister(1), 1);
1561 66 : Label done;
1562 66 : Label fail;
1563 : __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1564 132 : if (instr->InputAt(0)->IsFPRegister()) {
1565 66 : __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1566 : } else {
1567 0 : __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1568 : }
1569 : // If the input is NaN, then the conversion fails.
1570 66 : __ j(parity_even, &fail);
1571 : // If the input is INT64_MIN, then the conversion succeeds.
1572 66 : __ j(equal, &done);
1573 132 : __ cmpq(i.OutputRegister(0), Immediate(1));
1574 : // If the conversion results in INT64_MIN, but the input was not
1575 : // INT64_MIN, then the conversion fails.
1576 66 : __ j(no_overflow, &done);
1577 66 : __ bind(&fail);
1578 132 : __ Set(i.OutputRegister(1), 0);
1579 66 : __ bind(&done);
1580 : }
1581 : break;
1582 : case kSSEFloat64ToInt64:
1583 1606 : if (instr->InputAt(0)->IsFPRegister()) {
1584 802 : __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1585 : } else {
1586 2 : __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1587 : }
1588 806 : if (instr->OutputCount() > 1) {
1589 1378 : __ Set(i.OutputRegister(1), 1);
1590 694 : Label done;
1591 694 : Label fail;
1592 : __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1593 1392 : if (instr->InputAt(0)->IsFPRegister()) {
1594 696 : __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1595 : } else {
1596 0 : __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1597 : }
1598 : // If the input is NaN, then the conversion fails.
1599 693 : __ j(parity_even, &fail);
1600 : // If the input is INT64_MIN, then the conversion succeeds.
1601 686 : __ j(equal, &done);
1602 1384 : __ cmpq(i.OutputRegister(0), Immediate(1));
1603 : // If the conversion results in INT64_MIN, but the input was not
1604 : // INT64_MIN, then the conversion fails.
1605 693 : __ j(no_overflow, &done);
1606 692 : __ bind(&fail);
1607 1366 : __ Set(i.OutputRegister(1), 0);
1608 690 : __ bind(&done);
1609 : }
1610 : break;
1611 : case kSSEFloat32ToUint64: {
1612 71 : Label fail;
1613 137 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1614 142 : if (instr->InputAt(0)->IsFPRegister()) {
1615 142 : __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1616 : } else {
1617 0 : __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1618 : }
1619 137 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1620 71 : __ bind(&fail);
1621 : break;
1622 : }
1623 : case kSSEFloat64ToUint64: {
1624 3888 : Label fail;
1625 3963 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1626 7776 : if (instr->InputAt(0)->IsFPRegister()) {
1627 7776 : __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1628 : } else {
1629 0 : __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1630 : }
1631 3963 : if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1632 3888 : __ bind(&fail);
1633 : break;
1634 : }
1635 : case kSSEInt32ToFloat64:
1636 799556 : if (instr->InputAt(0)->IsRegister()) {
1637 395608 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1638 : } else {
1639 8340 : __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1640 : }
1641 : break;
1642 : case kSSEInt32ToFloat32:
1643 2190 : if (instr->InputAt(0)->IsRegister()) {
1644 1086 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1645 : } else {
1646 18 : __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1647 : }
1648 : break;
1649 : case kSSEInt64ToFloat32:
1650 84 : if (instr->InputAt(0)->IsRegister()) {
1651 42 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1652 : } else {
1653 0 : __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1654 : }
1655 : break;
1656 : case kSSEInt64ToFloat64:
1657 6762 : if (instr->InputAt(0)->IsRegister()) {
1658 1254 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1659 : } else {
1660 4254 : __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1661 : }
1662 : break;
1663 : case kSSEUint64ToFloat32:
1664 84 : if (instr->InputAt(0)->IsRegister()) {
1665 42 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1666 : } else {
1667 0 : __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1668 : }
1669 : break;
1670 : case kSSEUint64ToFloat64:
1671 7262 : if (instr->InputAt(0)->IsRegister()) {
1672 2399 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1673 : } else {
1674 2464 : __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1675 : }
1676 : break;
1677 : case kSSEUint32ToFloat64:
1678 21594 : if (instr->InputAt(0)->IsRegister()) {
1679 445 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1680 : } else {
1681 20704 : __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1682 : }
1683 : break;
1684 : case kSSEUint32ToFloat32:
1685 224 : if (instr->InputAt(0)->IsRegister()) {
1686 112 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1687 : } else {
1688 0 : __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1689 : }
1690 : break;
1691 : case kSSEFloat64ExtractLowWord32:
1692 234 : if (instr->InputAt(0)->IsFPStackSlot()) {
1693 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
1694 : } else {
1695 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1696 : }
1697 : break;
1698 : case kSSEFloat64ExtractHighWord32:
1699 200434 : if (instr->InputAt(0)->IsFPStackSlot()) {
1700 126990 : __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1701 : } else {
1702 36722 : __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1703 : }
1704 : break;
1705 : case kSSEFloat64InsertLowWord32:
1706 10 : if (instr->InputAt(1)->IsRegister()) {
1707 5 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1708 : } else {
1709 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1710 : }
1711 : break;
1712 : case kSSEFloat64InsertHighWord32:
1713 234 : if (instr->InputAt(1)->IsRegister()) {
1714 117 : __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1715 : } else {
1716 0 : __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1717 : }
1718 : break;
1719 : case kSSEFloat64LoadLowWord32:
1720 224 : if (instr->InputAt(0)->IsRegister()) {
1721 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1722 : } else {
1723 0 : __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1724 : }
1725 : break;
1726 : case kAVXFloat32Cmp: {
1727 : CpuFeatureScope avx_scope(tasm(), AVX);
1728 5686 : if (instr->InputAt(1)->IsFPRegister()) {
1729 2822 : __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1730 : } else {
1731 42 : __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1732 : }
1733 : break;
1734 : }
1735 : case kAVXFloat32Add:
1736 6618 : ASSEMBLE_AVX_BINOP(vaddss);
1737 : break;
1738 : case kAVXFloat32Sub:
1739 9561 : ASSEMBLE_AVX_BINOP(vsubss);
1740 : break;
1741 : case kAVXFloat32Mul:
1742 3156 : ASSEMBLE_AVX_BINOP(vmulss);
1743 : break;
1744 : case kAVXFloat32Div:
1745 1248 : ASSEMBLE_AVX_BINOP(vdivss);
1746 : // Don't delete this mov. It may improve performance on some CPUs,
1747 : // when there is a (v)mulss depending on the result.
1748 416 : __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1749 : break;
1750 : case kAVXFloat64Cmp: {
1751 : CpuFeatureScope avx_scope(tasm(), AVX);
1752 528294 : if (instr->InputAt(1)->IsFPRegister()) {
1753 239669 : __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1754 : } else {
1755 48956 : __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1756 : }
1757 : break;
1758 : }
1759 : case kAVXFloat64Add:
1760 241191 : ASSEMBLE_AVX_BINOP(vaddsd);
1761 : break;
1762 : case kAVXFloat64Sub:
1763 47571 : ASSEMBLE_AVX_BINOP(vsubsd);
1764 : break;
1765 : case kAVXFloat64Mul:
1766 42759 : ASSEMBLE_AVX_BINOP(vmulsd);
1767 : break;
1768 : case kAVXFloat64Div:
1769 36357 : ASSEMBLE_AVX_BINOP(vdivsd);
1770 : // Don't delete this mov. It may improve performance on some CPUs,
1771 : // when there is a (v)mulsd depending on the result.
1772 12119 : __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1773 : break;
1774 : case kAVXFloat32Abs: {
1775 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1776 : CpuFeatureScope avx_scope(tasm(), AVX);
1777 99 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1778 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1779 198 : if (instr->InputAt(0)->IsFPRegister()) {
1780 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1781 99 : i.InputDoubleRegister(0));
1782 : } else {
1783 : __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1784 0 : i.InputOperand(0));
1785 : }
1786 : break;
1787 : }
1788 : case kAVXFloat32Neg: {
1789 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1790 : CpuFeatureScope avx_scope(tasm(), AVX);
1791 208 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1792 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1793 416 : if (instr->InputAt(0)->IsFPRegister()) {
1794 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1795 208 : i.InputDoubleRegister(0));
1796 : } else {
1797 : __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1798 0 : i.InputOperand(0));
1799 : }
1800 : break;
1801 : }
1802 : case kAVXFloat64Abs: {
1803 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1804 : CpuFeatureScope avx_scope(tasm(), AVX);
1805 657 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1806 : __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1807 1314 : if (instr->InputAt(0)->IsFPRegister()) {
1808 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1809 657 : i.InputDoubleRegister(0));
1810 : } else {
1811 : __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1812 0 : i.InputOperand(0));
1813 : }
1814 : break;
1815 : }
1816 : case kAVXFloat64Neg: {
1817 : // TODO(bmeurer): Use RIP relative 128-bit constants.
1818 : CpuFeatureScope avx_scope(tasm(), AVX);
1819 9895 : __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1820 : __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1821 19790 : if (instr->InputAt(0)->IsFPRegister()) {
1822 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1823 9805 : i.InputDoubleRegister(0));
1824 : } else {
1825 : __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1826 90 : i.InputOperand(0));
1827 : }
1828 : break;
1829 : }
1830 : case kSSEFloat64SilenceNaN:
1831 : __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1832 5818 : __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1833 : break;
1834 : case kX64Movsxbl:
1835 104512 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1836 157502 : ASSEMBLE_MOVX(movsxbl);
1837 104512 : __ AssertZeroExtended(i.OutputRegister());
1838 52256 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1839 52256 : break;
1840 : case kX64Movzxbl:
1841 360668 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1842 543202 : ASSEMBLE_MOVX(movzxbl);
1843 360668 : __ AssertZeroExtended(i.OutputRegister());
1844 180334 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1845 180334 : break;
1846 : case kX64Movsxbq:
1847 27250 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1848 40885 : ASSEMBLE_MOVX(movsxbq);
1849 13625 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1850 13625 : break;
1851 : case kX64Movzxbq:
1852 28722 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1853 43083 : ASSEMBLE_MOVX(movzxbq);
1854 28722 : __ AssertZeroExtended(i.OutputRegister());
1855 14361 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1856 14361 : break;
1857 : case kX64Movb: {
1858 177842 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1859 88920 : size_t index = 0;
1860 88920 : Operand operand = i.MemoryOperand(&index);
1861 177842 : if (HasImmediateInput(instr, index)) {
1862 13792 : __ movb(operand, Immediate(i.InputInt8(index)));
1863 : } else {
1864 164050 : __ movb(operand, i.InputRegister(index));
1865 : }
1866 88921 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1867 : break;
1868 : }
1869 : case kX64Movsxwl:
1870 24208 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1871 36889 : ASSEMBLE_MOVX(movsxwl);
1872 24208 : __ AssertZeroExtended(i.OutputRegister());
1873 12104 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1874 12104 : break;
1875 : case kX64Movzxwl:
1876 305840 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1877 465580 : ASSEMBLE_MOVX(movzxwl);
1878 305840 : __ AssertZeroExtended(i.OutputRegister());
1879 152920 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1880 152920 : break;
1881 : case kX64Movsxwq:
1882 18066 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1883 27109 : ASSEMBLE_MOVX(movsxwq);
1884 : break;
1885 : case kX64Movzxwq:
1886 1344 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1887 2016 : ASSEMBLE_MOVX(movzxwq);
1888 1344 : __ AssertZeroExtended(i.OutputRegister());
1889 672 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1890 672 : break;
1891 : case kX64Movw: {
1892 25752 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1893 12876 : size_t index = 0;
1894 12876 : Operand operand = i.MemoryOperand(&index);
1895 25752 : if (HasImmediateInput(instr, index)) {
1896 1940 : __ movw(operand, Immediate(i.InputInt16(index)));
1897 : } else {
1898 23812 : __ movw(operand, i.InputRegister(index));
1899 : }
1900 12876 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1901 : break;
1902 : }
1903 : case kX64Movl:
1904 5128658 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1905 2564350 : if (instr->HasOutput()) {
1906 725818 : if (instr->addressing_mode() == kMode_None) {
1907 147654 : if (instr->InputAt(0)->IsRegister()) {
1908 139880 : __ movl(i.OutputRegister(), i.InputRegister(0));
1909 : } else {
1910 7772 : __ movl(i.OutputRegister(), i.InputOperand(0));
1911 : }
1912 : } else {
1913 1304004 : __ movl(i.OutputRegister(), i.MemoryOperand());
1914 : }
1915 1451684 : __ AssertZeroExtended(i.OutputRegister());
1916 : } else {
1917 1838532 : size_t index = 0;
1918 1838532 : Operand operand = i.MemoryOperand(&index);
1919 3677104 : if (HasImmediateInput(instr, index)) {
1920 525765 : __ movl(operand, i.InputImmediate(index));
1921 : } else {
1922 2625582 : __ movl(operand, i.InputRegister(index));
1923 : }
1924 : }
1925 2564361 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1926 2564343 : break;
1927 : case kX64Movsxlq:
1928 273374 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1929 495906 : ASSEMBLE_MOVX(movsxlq);
1930 136687 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1931 136687 : break;
1932 : case kX64MovqDecompressTaggedSigned: {
1933 0 : CHECK(instr->HasOutput());
1934 : __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand(),
1935 0 : DEBUG_BOOL ? i.TempRegister(0) : no_reg);
1936 0 : break;
1937 : }
1938 : case kX64MovqDecompressTaggedPointer: {
1939 0 : CHECK(instr->HasOutput());
1940 : __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand(),
1941 0 : DEBUG_BOOL ? i.TempRegister(0) : no_reg);
1942 0 : break;
1943 : }
1944 : case kX64MovqDecompressAnyTagged: {
1945 0 : CHECK(instr->HasOutput());
1946 : __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
1947 : i.TempRegister(0),
1948 0 : DEBUG_BOOL ? i.TempRegister(1) : no_reg);
1949 0 : break;
1950 : }
1951 : case kX64Movq:
1952 13577648 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1953 6788826 : if (instr->HasOutput()) {
1954 9190896 : __ movq(i.OutputRegister(), i.MemoryOperand());
1955 : } else {
1956 2193416 : size_t index = 0;
1957 2193416 : Operand operand = i.MemoryOperand(&index);
1958 4386836 : if (HasImmediateInput(instr, index)) {
1959 21161 : __ movq(operand, i.InputImmediate(index));
1960 : } else {
1961 4344514 : __ movq(operand, i.InputRegister(index));
1962 : }
1963 : }
1964 6788933 : EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1965 6788920 : break;
1966 : case kX64Movss:
1967 65914 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1968 32958 : if (instr->HasOutput()) {
1969 40414 : __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1970 : } else {
1971 12751 : size_t index = 0;
1972 12751 : Operand operand = i.MemoryOperand(&index);
1973 25502 : __ movss(operand, i.InputDoubleRegister(index));
1974 : }
1975 : break;
1976 : case kX64Movsd: {
1977 1252080 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1978 626073 : if (instr->HasOutput()) {
1979 : const MemoryAccessMode access_mode =
1980 : static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1981 433649 : if (access_mode == kMemoryAccessPoisoned) {
1982 : // If we have to poison the loaded value, we load into a general
1983 : // purpose register first, mask it with the poison, and move the
1984 : // value from the general purpose register into the double register.
1985 0 : __ movq(kScratchRegister, i.MemoryOperand());
1986 0 : __ andq(kScratchRegister, kSpeculationPoisonRegister);
1987 0 : __ Movq(i.OutputDoubleRegister(), kScratchRegister);
1988 : } else {
1989 433653 : __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1990 : }
1991 : } else {
1992 192424 : size_t index = 0;
1993 192424 : Operand operand = i.MemoryOperand(&index);
1994 192425 : __ Movsd(operand, i.InputDoubleRegister(index));
1995 : }
1996 : break;
1997 : }
1998 : case kX64Movdqu: {
1999 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2000 51040 : EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
2001 25520 : if (instr->HasOutput()) {
2002 34930 : __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2003 : } else {
2004 8055 : size_t index = 0;
2005 8055 : Operand operand = i.MemoryOperand(&index);
2006 16110 : __ movdqu(operand, i.InputSimd128Register(index));
2007 : }
2008 : break;
2009 : }
2010 : case kX64BitcastFI:
2011 1336 : if (instr->InputAt(0)->IsFPStackSlot()) {
2012 0 : __ movl(i.OutputRegister(), i.InputOperand(0));
2013 : } else {
2014 : __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2015 : }
2016 : break;
2017 : case kX64BitcastDL:
2018 1230 : if (instr->InputAt(0)->IsFPStackSlot()) {
2019 0 : __ movq(i.OutputRegister(), i.InputOperand(0));
2020 : } else {
2021 : __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2022 : }
2023 : break;
2024 : case kX64BitcastIF:
2025 740 : if (instr->InputAt(0)->IsRegister()) {
2026 : __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2027 : } else {
2028 0 : __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
2029 : }
2030 : break;
2031 : case kX64BitcastLD:
2032 386 : if (instr->InputAt(0)->IsRegister()) {
2033 : __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2034 : } else {
2035 0 : __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2036 : }
2037 : break;
2038 : case kX64Lea32: {
2039 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2040 : // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2041 : // and addressing mode just happens to work out. The "addl"/"subl" forms
2042 : // in these cases are faster based on measurements.
2043 404087 : if (i.InputRegister(0) == i.OutputRegister()) {
2044 157635 : if (mode == kMode_MRI) {
2045 78443 : int32_t constant_summand = i.InputInt32(1);
2046 : DCHECK_NE(0, constant_summand);
2047 78446 : if (constant_summand > 0) {
2048 110152 : __ addl(i.OutputRegister(), Immediate(constant_summand));
2049 : } else {
2050 : __ subl(i.OutputRegister(),
2051 46740 : Immediate(base::NegateWithWraparound(constant_summand)));
2052 : }
2053 79192 : } else if (mode == kMode_MR1) {
2054 13542 : if (i.InputRegister(1) == i.OutputRegister()) {
2055 1069 : __ shll(i.OutputRegister(), Immediate(1));
2056 : } else {
2057 12473 : __ addl(i.OutputRegister(), i.InputRegister(1));
2058 : }
2059 65650 : } else if (mode == kMode_M2) {
2060 0 : __ shll(i.OutputRegister(), Immediate(1));
2061 65650 : } else if (mode == kMode_M4) {
2062 398 : __ shll(i.OutputRegister(), Immediate(2));
2063 65252 : } else if (mode == kMode_M8) {
2064 3300 : __ shll(i.OutputRegister(), Immediate(3));
2065 : } else {
2066 123904 : __ leal(i.OutputRegister(), i.MemoryOperand());
2067 : }
2068 279610 : } else if (mode == kMode_MR1 &&
2069 : i.InputRegister(1) == i.OutputRegister()) {
2070 20695 : __ addl(i.OutputRegister(), i.InputRegister(0));
2071 : } else {
2072 451508 : __ leal(i.OutputRegister(), i.MemoryOperand());
2073 : }
2074 808216 : __ AssertZeroExtended(i.OutputRegister());
2075 404110 : break;
2076 : }
2077 : case kX64Lea: {
2078 : AddressingMode mode = AddressingModeField::decode(instr->opcode());
2079 : // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2080 : // and addressing mode just happens to work out. The "addq"/"subq" forms
2081 : // in these cases are faster based on measurements.
2082 2113927 : if (i.InputRegister(0) == i.OutputRegister()) {
2083 631884 : if (mode == kMode_MRI) {
2084 520643 : int32_t constant_summand = i.InputInt32(1);
2085 520643 : if (constant_summand > 0) {
2086 854758 : __ addq(i.OutputRegister(), Immediate(constant_summand));
2087 93264 : } else if (constant_summand < 0) {
2088 279669 : __ subq(i.OutputRegister(), Immediate(-constant_summand));
2089 : }
2090 111241 : } else if (mode == kMode_MR1) {
2091 52470 : if (i.InputRegister(1) == i.OutputRegister()) {
2092 2055 : __ shlq(i.OutputRegister(), Immediate(1));
2093 : } else {
2094 50415 : __ addq(i.OutputRegister(), i.InputRegister(1));
2095 : }
2096 58771 : } else if (mode == kMode_M2) {
2097 0 : __ shlq(i.OutputRegister(), Immediate(1));
2098 58771 : } else if (mode == kMode_M4) {
2099 336 : __ shlq(i.OutputRegister(), Immediate(2));
2100 58435 : } else if (mode == kMode_M8) {
2101 11679 : __ shlq(i.OutputRegister(), Immediate(3));
2102 : } else {
2103 93512 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2104 : }
2105 1751160 : } else if (mode == kMode_MR1 &&
2106 : i.InputRegister(1) == i.OutputRegister()) {
2107 193233 : __ addq(i.OutputRegister(), i.InputRegister(0));
2108 : } else {
2109 2577630 : __ leaq(i.OutputRegister(), i.MemoryOperand());
2110 : }
2111 : break;
2112 : }
2113 : case kX64Dec32:
2114 0 : __ decl(i.OutputRegister());
2115 : break;
2116 : case kX64Inc32:
2117 0 : __ incl(i.OutputRegister());
2118 : break;
2119 : case kX64Push:
2120 3058816 : if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2121 25846 : size_t index = 0;
2122 25846 : Operand operand = i.MemoryOperand(&index);
2123 25846 : __ pushq(operand);
2124 : frame_access_state()->IncreaseSPDelta(1);
2125 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2126 51692 : kSystemPointerSize);
2127 3032970 : } else if (HasImmediateInput(instr, 0)) {
2128 201411 : __ pushq(i.InputImmediate(0));
2129 : frame_access_state()->IncreaseSPDelta(1);
2130 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2131 402822 : kSystemPointerSize);
2132 2831559 : } else if (instr->InputAt(0)->IsRegister()) {
2133 2145814 : __ pushq(i.InputRegister(0));
2134 : frame_access_state()->IncreaseSPDelta(1);
2135 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2136 4291662 : kSystemPointerSize);
2137 1362101 : } else if (instr->InputAt(0)->IsFloatRegister() ||
2138 : instr->InputAt(0)->IsDoubleRegister()) {
2139 : // TODO(titzer): use another machine instruction?
2140 17983 : __ subq(rsp, Immediate(kDoubleSize));
2141 : frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2142 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2143 35966 : kDoubleSize);
2144 53949 : __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2145 667762 : } else if (instr->InputAt(0)->IsSimd128Register()) {
2146 : // TODO(titzer): use another machine instruction?
2147 150 : __ subq(rsp, Immediate(kSimd128Size));
2148 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2149 : kSystemPointerSize);
2150 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2151 300 : kSimd128Size);
2152 450 : __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2153 685079 : } else if (instr->InputAt(0)->IsStackSlot() ||
2154 680417 : instr->InputAt(0)->IsFloatStackSlot() ||
2155 : instr->InputAt(0)->IsDoubleStackSlot()) {
2156 667282 : __ pushq(i.InputOperand(0));
2157 : frame_access_state()->IncreaseSPDelta(1);
2158 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2159 1334566 : kSystemPointerSize);
2160 : } else {
2161 : DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2162 330 : __ Movups(kScratchDoubleReg, i.InputOperand(0));
2163 : // TODO(titzer): use another machine instruction?
2164 330 : __ subq(rsp, Immediate(kSimd128Size));
2165 : frame_access_state()->IncreaseSPDelta(kSimd128Size /
2166 : kSystemPointerSize);
2167 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2168 660 : kSimd128Size);
2169 660 : __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2170 : }
2171 : break;
2172 : case kX64Poke: {
2173 : int slot = MiscField::decode(instr->opcode());
2174 3400 : if (HasImmediateInput(instr, 0)) {
2175 2270 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2176 : } else {
2177 4530 : __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2178 : }
2179 : break;
2180 : }
2181 : case kX64Peek: {
2182 6338 : int reverse_slot = i.InputInt32(0);
2183 : int offset =
2184 6338 : FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2185 6338 : if (instr->OutputAt(0)->IsFPRegister()) {
2186 : LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2187 3160 : if (op->representation() == MachineRepresentation::kFloat64) {
2188 3160 : __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2189 : } else {
2190 : DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2191 3160 : __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2192 : }
2193 : } else {
2194 9534 : __ movq(i.OutputRegister(), Operand(rbp, offset));
2195 : }
2196 : break;
2197 : }
2198 : // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2199 : case kX64F32x4Splat: {
2200 350 : XMMRegister dst = i.OutputSimd128Register();
2201 700 : if (instr->InputAt(0)->IsFPRegister()) {
2202 350 : __ movss(dst, i.InputDoubleRegister(0));
2203 : } else {
2204 0 : __ movss(dst, i.InputOperand(0));
2205 : }
2206 350 : __ shufps(dst, dst, 0x0);
2207 : break;
2208 : }
2209 : case kX64F32x4ExtractLane: {
2210 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2211 1360 : __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2212 1360 : __ movd(i.OutputDoubleRegister(), kScratchRegister);
2213 : break;
2214 : }
2215 : case kX64F32x4ReplaceLane: {
2216 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2217 : // The insertps instruction uses imm8[5:4] to indicate the lane
2218 : // that needs to be replaced.
2219 80 : byte select = i.InputInt8(1) << 4 & 0x30;
2220 160 : __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
2221 : break;
2222 : }
2223 : case kX64F32x4SConvertI32x4: {
2224 5 : __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2225 5 : break;
2226 : }
2227 : case kX64F32x4UConvertI32x4: {
2228 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2229 : DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2230 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2231 : XMMRegister dst = i.OutputSimd128Register();
2232 5 : __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2233 5 : __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2234 : __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2235 5 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2236 5 : __ psrld(dst, 1); // divide by 2 to get in unsigned range
2237 5 : __ cvtdq2ps(dst, dst); // convert hi exactly
2238 5 : __ addps(dst, dst); // double hi, exactly
2239 5 : __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2240 : break;
2241 : }
2242 : case kX64F32x4Abs: {
2243 : XMMRegister dst = i.OutputSimd128Register();
2244 : XMMRegister src = i.InputSimd128Register(0);
2245 10 : if (dst == src) {
2246 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2247 10 : __ psrld(kScratchDoubleReg, 1);
2248 20 : __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2249 : } else {
2250 0 : __ pcmpeqd(dst, dst);
2251 0 : __ psrld(dst, 1);
2252 0 : __ andps(dst, i.InputSimd128Register(0));
2253 : }
2254 : break;
2255 : }
2256 : case kX64F32x4Neg: {
2257 : XMMRegister dst = i.OutputSimd128Register();
2258 : XMMRegister src = i.InputSimd128Register(0);
2259 10 : if (dst == src) {
2260 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2261 10 : __ pslld(kScratchDoubleReg, 31);
2262 20 : __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2263 : } else {
2264 0 : __ pcmpeqd(dst, dst);
2265 0 : __ pslld(dst, 31);
2266 0 : __ xorps(dst, i.InputSimd128Register(0));
2267 : }
2268 : break;
2269 : }
2270 : case kX64F32x4RecipApprox: {
2271 10 : __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2272 10 : break;
2273 : }
2274 : case kX64F32x4RecipSqrtApprox: {
2275 10 : __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2276 10 : break;
2277 : }
2278 : case kX64F32x4Add: {
2279 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2280 30 : __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2281 30 : break;
2282 : }
2283 : case kX64F32x4AddHoriz: {
2284 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2285 : CpuFeatureScope sse_scope(tasm(), SSE3);
2286 10 : __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2287 : break;
2288 : }
2289 : case kX64F32x4Sub: {
2290 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2291 10 : __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2292 10 : break;
2293 : }
2294 : case kX64F32x4Mul: {
2295 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2296 10 : __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2297 10 : break;
2298 : }
2299 : case kX64F32x4Min: {
2300 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2301 10 : __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2302 10 : break;
2303 : }
2304 : case kX64F32x4Max: {
2305 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2306 10 : __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2307 10 : break;
2308 : }
2309 : case kX64F32x4Eq: {
2310 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2311 10 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2312 10 : break;
2313 : }
2314 : case kX64F32x4Ne: {
2315 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2316 10 : __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2317 10 : break;
2318 : }
2319 : case kX64F32x4Lt: {
2320 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2321 20 : __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2322 : break;
2323 : }
2324 : case kX64F32x4Le: {
2325 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2326 20 : __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2327 : break;
2328 : }
2329 : case kX64I32x4Splat: {
2330 2090 : XMMRegister dst = i.OutputSimd128Register();
2331 2090 : __ movd(dst, i.InputRegister(0));
2332 2090 : __ pshufd(dst, dst, 0x0);
2333 : break;
2334 : }
2335 : case kX64I32x4ExtractLane: {
2336 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2337 15384 : __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2338 : break;
2339 : }
2340 : case kX64I32x4ReplaceLane: {
2341 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2342 4560 : if (instr->InputAt(2)->IsRegister()) {
2343 : __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2344 840 : i.InputInt8(1));
2345 : } else {
2346 3720 : __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2347 : }
2348 : break;
2349 : }
2350 : case kX64I32x4SConvertF32x4: {
2351 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2352 : XMMRegister dst = i.OutputSimd128Register();
2353 : // NAN->0
2354 10 : __ movaps(kScratchDoubleReg, dst);
2355 : __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2356 : __ pand(dst, kScratchDoubleReg);
2357 : // Set top bit if >= 0 (but not -0.0!)
2358 : __ pxor(kScratchDoubleReg, dst);
2359 : // Convert
2360 10 : __ cvttps2dq(dst, dst);
2361 : // Set top bit if >=0 is now < 0
2362 : __ pand(kScratchDoubleReg, dst);
2363 10 : __ psrad(kScratchDoubleReg, 31);
2364 : // Set positive overflow lanes to 0x7FFFFFFF
2365 : __ pxor(dst, kScratchDoubleReg);
2366 : break;
2367 : }
2368 : case kX64I32x4SConvertI16x8Low: {
2369 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2370 10 : __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2371 : break;
2372 : }
2373 : case kX64I32x4SConvertI16x8High: {
2374 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2375 : XMMRegister dst = i.OutputSimd128Register();
2376 10 : __ palignr(dst, i.InputSimd128Register(0), 8);
2377 : __ pmovsxwd(dst, dst);
2378 : break;
2379 : }
2380 : case kX64I32x4Neg: {
2381 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2382 : XMMRegister dst = i.OutputSimd128Register();
2383 : XMMRegister src = i.InputSimd128Register(0);
2384 10 : if (dst == src) {
2385 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2386 : __ psignd(dst, kScratchDoubleReg);
2387 : } else {
2388 0 : __ pxor(dst, dst);
2389 : __ psubd(dst, src);
2390 : }
2391 : break;
2392 : }
2393 : case kX64I32x4Shl: {
2394 620 : __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2395 310 : break;
2396 : }
2397 : case kX64I32x4ShrS: {
2398 620 : __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2399 310 : break;
2400 : }
2401 : case kX64I32x4Add: {
2402 30 : __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2403 : break;
2404 : }
2405 : case kX64I32x4AddHoriz: {
2406 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2407 10 : __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2408 : break;
2409 : }
2410 : case kX64I32x4Sub: {
2411 10 : __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2412 : break;
2413 : }
2414 : case kX64I32x4Mul: {
2415 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2416 10 : __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2417 : break;
2418 : }
2419 : case kX64I32x4MinS: {
2420 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2421 10 : __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2422 : break;
2423 : }
2424 : case kX64I32x4MaxS: {
2425 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2426 10 : __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2427 : break;
2428 : }
2429 : case kX64I32x4Eq: {
2430 30 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2431 : break;
2432 : }
2433 : case kX64I32x4Ne: {
2434 35 : __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2435 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2436 35 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2437 : break;
2438 : }
2439 : case kX64I32x4GtS: {
2440 20 : __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2441 : break;
2442 : }
2443 : case kX64I32x4GeS: {
2444 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2445 : XMMRegister dst = i.OutputSimd128Register();
2446 : XMMRegister src = i.InputSimd128Register(1);
2447 20 : __ pminsd(dst, src);
2448 : __ pcmpeqd(dst, src);
2449 : break;
2450 : }
2451 : case kX64I32x4UConvertF32x4: {
2452 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2453 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2454 : XMMRegister dst = i.OutputSimd128Register();
2455 10 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2456 : // NAN->0, negative->0
2457 10 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2458 10 : __ maxps(dst, kScratchDoubleReg);
2459 : // scratch: float representation of max_signed
2460 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2461 10 : __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2462 10 : __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2463 : // tmp: convert (src-max_signed).
2464 : // Positive overflow lanes -> 0x7FFFFFFF
2465 : // Negative lanes -> 0
2466 10 : __ movaps(tmp, dst);
2467 10 : __ subps(tmp, kScratchDoubleReg);
2468 : __ cmpleps(kScratchDoubleReg, tmp);
2469 10 : __ cvttps2dq(tmp, tmp);
2470 : __ pxor(tmp, kScratchDoubleReg);
2471 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2472 : __ pmaxsd(tmp, kScratchDoubleReg);
2473 : // convert. Overflow lanes above max_signed will be 0x80000000
2474 10 : __ cvttps2dq(dst, dst);
2475 : // Add (src-max_signed) for overflow lanes.
2476 : __ paddd(dst, tmp);
2477 : break;
2478 : }
2479 : case kX64I32x4UConvertI16x8Low: {
2480 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2481 10 : __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2482 : break;
2483 : }
2484 : case kX64I32x4UConvertI16x8High: {
2485 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2486 : XMMRegister dst = i.OutputSimd128Register();
2487 10 : __ palignr(dst, i.InputSimd128Register(0), 8);
2488 : __ pmovzxwd(dst, dst);
2489 : break;
2490 : }
2491 : case kX64I32x4ShrU: {
2492 620 : __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2493 310 : break;
2494 : }
2495 : case kX64I32x4MinU: {
2496 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2497 10 : __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2498 : break;
2499 : }
2500 : case kX64I32x4MaxU: {
2501 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2502 10 : __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2503 : break;
2504 : }
2505 : case kX64I32x4GtU: {
2506 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2507 : XMMRegister dst = i.OutputSimd128Register();
2508 : XMMRegister src = i.InputSimd128Register(1);
2509 20 : __ pmaxud(dst, src);
2510 : __ pcmpeqd(dst, src);
2511 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2512 : __ pxor(dst, kScratchDoubleReg);
2513 : break;
2514 : }
2515 : case kX64I32x4GeU: {
2516 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2517 : XMMRegister dst = i.OutputSimd128Register();
2518 : XMMRegister src = i.InputSimd128Register(1);
2519 20 : __ pminud(dst, src);
2520 : __ pcmpeqd(dst, src);
2521 : break;
2522 : }
2523 : case kX64S128Zero: {
2524 18 : XMMRegister dst = i.OutputSimd128Register();
2525 18 : __ xorps(dst, dst);
2526 : break;
2527 : }
2528 : case kX64I16x8Splat: {
2529 980 : XMMRegister dst = i.OutputSimd128Register();
2530 980 : __ movd(dst, i.InputRegister(0));
2531 980 : __ pshuflw(dst, dst, 0x0);
2532 980 : __ pshufd(dst, dst, 0x0);
2533 : break;
2534 : }
2535 : case kX64I16x8ExtractLane: {
2536 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2537 6600 : Register dst = i.OutputRegister();
2538 13200 : __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2539 6600 : __ movsxwl(dst, dst);
2540 : break;
2541 : }
2542 : case kX64I16x8ReplaceLane: {
2543 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2544 240 : if (instr->InputAt(2)->IsRegister()) {
2545 : __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2546 240 : i.InputInt8(1));
2547 : } else {
2548 0 : __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2549 : }
2550 : break;
2551 : }
2552 : case kX64I16x8SConvertI8x16Low: {
2553 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2554 10 : __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2555 : break;
2556 : }
2557 : case kX64I16x8SConvertI8x16High: {
2558 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2559 : XMMRegister dst = i.OutputSimd128Register();
2560 10 : __ palignr(dst, i.InputSimd128Register(0), 8);
2561 : __ pmovsxbw(dst, dst);
2562 : break;
2563 : }
2564 : case kX64I16x8Neg: {
2565 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2566 : XMMRegister dst = i.OutputSimd128Register();
2567 : XMMRegister src = i.InputSimd128Register(0);
2568 10 : if (dst == src) {
2569 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2570 : __ psignw(dst, kScratchDoubleReg);
2571 : } else {
2572 0 : __ pxor(dst, dst);
2573 : __ psubw(dst, src);
2574 : }
2575 : break;
2576 : }
2577 : case kX64I16x8Shl: {
2578 300 : __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2579 150 : break;
2580 : }
2581 : case kX64I16x8ShrS: {
2582 300 : __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2583 150 : break;
2584 : }
2585 : case kX64I16x8SConvertI32x4: {
2586 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2587 10 : __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2588 : break;
2589 : }
2590 : case kX64I16x8Add: {
2591 10 : __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2592 : break;
2593 : }
2594 : case kX64I16x8AddSaturateS: {
2595 10 : __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2596 : break;
2597 : }
2598 : case kX64I16x8AddHoriz: {
2599 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2600 10 : __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2601 : break;
2602 : }
2603 : case kX64I16x8Sub: {
2604 10 : __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2605 : break;
2606 : }
2607 : case kX64I16x8SubSaturateS: {
2608 10 : __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2609 : break;
2610 : }
2611 : case kX64I16x8Mul: {
2612 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2613 10 : __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2614 : break;
2615 : }
2616 : case kX64I16x8MinS: {
2617 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2618 10 : __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2619 : break;
2620 : }
2621 : case kX64I16x8MaxS: {
2622 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2623 10 : __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2624 : break;
2625 : }
2626 : case kX64I16x8Eq: {
2627 30 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2628 : break;
2629 : }
2630 : case kX64I16x8Ne: {
2631 35 : __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2632 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2633 35 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2634 : break;
2635 : }
2636 : case kX64I16x8GtS: {
2637 20 : __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2638 : break;
2639 : }
2640 : case kX64I16x8GeS: {
2641 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2642 : XMMRegister dst = i.OutputSimd128Register();
2643 : XMMRegister src = i.InputSimd128Register(1);
2644 20 : __ pminsw(dst, src);
2645 : __ pcmpeqw(dst, src);
2646 : break;
2647 : }
2648 : case kX64I16x8UConvertI8x16Low: {
2649 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2650 10 : __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2651 : break;
2652 : }
2653 : case kX64I16x8UConvertI8x16High: {
2654 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2655 : XMMRegister dst = i.OutputSimd128Register();
2656 10 : __ palignr(dst, i.InputSimd128Register(0), 8);
2657 : __ pmovzxbw(dst, dst);
2658 : break;
2659 : }
2660 : case kX64I16x8ShrU: {
2661 300 : __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2662 150 : break;
2663 : }
2664 : case kX64I16x8UConvertI32x4: {
2665 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2666 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2667 : XMMRegister dst = i.OutputSimd128Register();
2668 : // Change negative lanes to 0x7FFFFFFF
2669 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2670 10 : __ psrld(kScratchDoubleReg, 1);
2671 : __ pminud(dst, kScratchDoubleReg);
2672 10 : __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2673 : __ packusdw(dst, kScratchDoubleReg);
2674 : break;
2675 : }
2676 : case kX64I16x8AddSaturateU: {
2677 10 : __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2678 : break;
2679 : }
2680 : case kX64I16x8SubSaturateU: {
2681 10 : __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2682 : break;
2683 : }
2684 : case kX64I16x8MinU: {
2685 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2686 10 : __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2687 : break;
2688 : }
2689 : case kX64I16x8MaxU: {
2690 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2691 10 : __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2692 : break;
2693 : }
2694 : case kX64I16x8GtU: {
2695 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2696 : XMMRegister dst = i.OutputSimd128Register();
2697 : XMMRegister src = i.InputSimd128Register(1);
2698 20 : __ pmaxuw(dst, src);
2699 : __ pcmpeqw(dst, src);
2700 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2701 : __ pxor(dst, kScratchDoubleReg);
2702 : break;
2703 : }
2704 : case kX64I16x8GeU: {
2705 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2706 : XMMRegister dst = i.OutputSimd128Register();
2707 : XMMRegister src = i.InputSimd128Register(1);
2708 40 : __ pminuw(dst, src);
2709 : __ pcmpeqw(dst, src);
2710 : break;
2711 : }
2712 : case kX64I8x16Splat: {
2713 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2714 : XMMRegister dst = i.OutputSimd128Register();
2715 720 : __ movd(dst, i.InputRegister(0));
2716 720 : __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2717 : __ pshufb(dst, kScratchDoubleReg);
2718 : break;
2719 : }
2720 : case kX64I8x16ExtractLane: {
2721 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2722 9880 : Register dst = i.OutputRegister();
2723 19760 : __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2724 9880 : __ movsxbl(dst, dst);
2725 : break;
2726 : }
2727 : case kX64I8x16ReplaceLane: {
2728 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2729 400 : if (instr->InputAt(2)->IsRegister()) {
2730 : __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2731 400 : i.InputInt8(1));
2732 : } else {
2733 0 : __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2734 : }
2735 : break;
2736 : }
2737 : case kX64I8x16SConvertI16x8: {
2738 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2739 10 : __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2740 : break;
2741 : }
2742 : case kX64I8x16Neg: {
2743 : CpuFeatureScope sse_scope(tasm(), SSSE3);
2744 : XMMRegister dst = i.OutputSimd128Register();
2745 : XMMRegister src = i.InputSimd128Register(0);
2746 10 : if (dst == src) {
2747 10 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2748 : __ psignb(dst, kScratchDoubleReg);
2749 : } else {
2750 0 : __ pxor(dst, dst);
2751 : __ psubb(dst, src);
2752 : }
2753 : break;
2754 : }
2755 : case kX64I8x16Shl: {
2756 : XMMRegister dst = i.OutputSimd128Register();
2757 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2758 70 : int8_t shift = i.InputInt8(1) & 0x7;
2759 70 : if (shift < 4) {
2760 : // For small shifts, doubling is faster.
2761 60 : for (int i = 0; i < shift; ++i) {
2762 60 : __ paddb(dst, dst);
2763 : }
2764 : } else {
2765 : // Mask off the unwanted bits before word-shifting.
2766 40 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2767 40 : __ psrlw(kScratchDoubleReg, 8 + shift);
2768 : __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2769 : __ pand(dst, kScratchDoubleReg);
2770 40 : __ psllw(dst, shift);
2771 : }
2772 : break;
2773 : }
2774 : case kX64I8x16ShrS: {
2775 : XMMRegister dst = i.OutputSimd128Register();
2776 : XMMRegister src = i.InputSimd128Register(0);
2777 70 : int8_t shift = i.InputInt8(1) & 0x7;
2778 : // Unpack the bytes into words, do arithmetic shifts, and repack.
2779 70 : __ punpckhbw(kScratchDoubleReg, src);
2780 : __ punpcklbw(dst, src);
2781 70 : __ psraw(kScratchDoubleReg, 8 + shift);
2782 70 : __ psraw(dst, 8 + shift);
2783 : __ packsswb(dst, kScratchDoubleReg);
2784 : break;
2785 : }
2786 : case kX64I8x16Add: {
2787 10 : __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2788 : break;
2789 : }
2790 : case kX64I8x16AddSaturateS: {
2791 10 : __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2792 : break;
2793 : }
2794 : case kX64I8x16Sub: {
2795 10 : __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2796 : break;
2797 : }
2798 : case kX64I8x16SubSaturateS: {
2799 10 : __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2800 : break;
2801 : }
2802 : case kX64I8x16Mul: {
2803 : XMMRegister dst = i.OutputSimd128Register();
2804 : DCHECK_EQ(dst, i.InputSimd128Register(0));
2805 : XMMRegister right = i.InputSimd128Register(1);
2806 10 : XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2807 : // I16x8 view of I8x16
2808 : // left = AAaa AAaa ... AAaa AAaa
2809 : // right= BBbb BBbb ... BBbb BBbb
2810 : // t = 00AA 00AA ... 00AA 00AA
2811 : // s = 00BB 00BB ... 00BB 00BB
2812 10 : __ movaps(tmp, dst);
2813 10 : __ movaps(kScratchDoubleReg, right);
2814 10 : __ psrlw(tmp, 8);
2815 10 : __ psrlw(kScratchDoubleReg, 8);
2816 : // dst = left * 256
2817 10 : __ psllw(dst, 8);
2818 : // t = I16x8Mul(t, s)
2819 : // => __PP __PP ... __PP __PP
2820 : __ pmullw(tmp, kScratchDoubleReg);
2821 : // dst = I16x8Mul(left * 256, right)
2822 : // => pp__ pp__ ... pp__ pp__
2823 : __ pmullw(dst, right);
2824 : // t = I16x8Shl(t, 8)
2825 : // => PP00 PP00 ... PP00 PP00
2826 10 : __ psllw(tmp, 8);
2827 : // dst = I16x8Shr(dst, 8)
2828 : // => 00pp 00pp ... 00pp 00pp
2829 10 : __ psrlw(dst, 8);
2830 : // dst = I16x8Or(dst, t)
2831 : // => PPpp PPpp ... PPpp PPpp
2832 : __ por(dst, tmp);
2833 : break;
2834 : }
2835 : case kX64I8x16MinS: {
2836 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2837 10 : __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2838 : break;
2839 : }
2840 : case kX64I8x16MaxS: {
2841 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2842 10 : __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2843 : break;
2844 : }
2845 : case kX64I8x16Eq: {
2846 30 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2847 : break;
2848 : }
2849 : case kX64I8x16Ne: {
2850 35 : __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2851 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2852 35 : __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2853 : break;
2854 : }
2855 : case kX64I8x16GtS: {
2856 20 : __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2857 : break;
2858 : }
2859 : case kX64I8x16GeS: {
2860 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2861 : XMMRegister dst = i.OutputSimd128Register();
2862 : XMMRegister src = i.InputSimd128Register(1);
2863 20 : __ pminsb(dst, src);
2864 : __ pcmpeqb(dst, src);
2865 : break;
2866 : }
2867 : case kX64I8x16UConvertI16x8: {
2868 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2869 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2870 : XMMRegister dst = i.OutputSimd128Register();
2871 : // Change negative lanes to 0x7FFF
2872 10 : __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2873 10 : __ psrlw(kScratchDoubleReg, 1);
2874 : __ pminuw(dst, kScratchDoubleReg);
2875 10 : __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2876 : __ packuswb(dst, kScratchDoubleReg);
2877 : break;
2878 : }
2879 : case kX64I8x16ShrU: {
2880 : XMMRegister dst = i.OutputSimd128Register();
2881 : XMMRegister src = i.InputSimd128Register(0);
2882 70 : int8_t shift = i.InputInt8(1) & 0x7;
2883 : // Unpack the bytes into words, do logical shifts, and repack.
2884 70 : __ punpckhbw(kScratchDoubleReg, src);
2885 : __ punpcklbw(dst, src);
2886 70 : __ psrlw(kScratchDoubleReg, 8 + shift);
2887 70 : __ psrlw(dst, 8 + shift);
2888 : __ packuswb(dst, kScratchDoubleReg);
2889 : break;
2890 : }
2891 : case kX64I8x16AddSaturateU: {
2892 10 : __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2893 : break;
2894 : }
2895 : case kX64I8x16SubSaturateU: {
2896 10 : __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2897 : break;
2898 : }
2899 : case kX64I8x16MinU: {
2900 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2901 10 : __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2902 : break;
2903 : }
2904 : case kX64I8x16MaxU: {
2905 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2906 10 : __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2907 : break;
2908 : }
2909 : case kX64I8x16GtU: {
2910 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2911 : XMMRegister dst = i.OutputSimd128Register();
2912 : XMMRegister src = i.InputSimd128Register(1);
2913 20 : __ pmaxub(dst, src);
2914 : __ pcmpeqb(dst, src);
2915 : __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2916 : __ pxor(dst, kScratchDoubleReg);
2917 : break;
2918 : }
2919 : case kX64I8x16GeU: {
2920 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
2921 : XMMRegister dst = i.OutputSimd128Register();
2922 : XMMRegister src = i.InputSimd128Register(1);
2923 0 : __ pminub(dst, src);
2924 : __ pcmpeqb(dst, src);
2925 : break;
2926 : }
2927 : case kX64S128And: {
2928 10 : __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
2929 : break;
2930 : }
2931 : case kX64S128Or: {
2932 10 : __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
2933 : break;
2934 : }
2935 : case kX64S128Xor: {
2936 10 : __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
2937 : break;
2938 : }
2939 : case kX64S128Not: {
2940 : XMMRegister dst = i.OutputSimd128Register();
2941 : XMMRegister src = i.InputSimd128Register(0);
2942 10 : if (dst == src) {
2943 10 : __ movaps(kScratchDoubleReg, dst);
2944 : __ pcmpeqd(dst, dst);
2945 : __ pxor(dst, kScratchDoubleReg);
2946 : } else {
2947 0 : __ pcmpeqd(dst, dst);
2948 : __ pxor(dst, src);
2949 : }
2950 :
2951 : break;
2952 : }
2953 : case kX64S128Select: {
2954 : // Mask used here is stored in dst.
2955 35 : XMMRegister dst = i.OutputSimd128Register();
2956 35 : __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
2957 70 : __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
2958 35 : __ andps(dst, kScratchDoubleReg);
2959 70 : __ xorps(dst, i.InputSimd128Register(2));
2960 : break;
2961 : }
2962 : case kX64S8x16Shuffle: {
2963 : XMMRegister dst = i.OutputSimd128Register();
2964 : Register tmp = i.TempRegister(0);
2965 : // Prepare 16 byte aligned buffer for shuffle control mask
2966 3830 : __ movq(tmp, rsp);
2967 3830 : __ andq(rsp, Immediate(-16));
2968 3830 : if (instr->InputCount() == 5) { // only one input operand
2969 1700 : uint32_t mask[4] = {};
2970 : DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2971 10200 : for (int j = 4; j > 0; j--) {
2972 13600 : mask[j - 1] = i.InputUint32(j);
2973 : }
2974 :
2975 1700 : SetupShuffleMaskOnStack(tasm(), mask);
2976 3400 : __ pshufb(dst, Operand(rsp, 0));
2977 : } else { // two input operands
2978 : DCHECK_EQ(6, instr->InputCount());
2979 6390 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
2980 2130 : uint32_t mask[4] = {};
2981 10650 : for (int j = 5; j > 1; j--) {
2982 8520 : uint32_t lanes = i.InputUint32(j);
2983 42600 : for (int k = 0; k < 32; k += 8) {
2984 34080 : uint8_t lane = lanes >> k;
2985 34080 : mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
2986 : }
2987 : }
2988 2130 : SetupShuffleMaskOnStack(tasm(), mask);
2989 4260 : __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
2990 2130 : uint32_t mask1[4] = {};
2991 4260 : if (instr->InputAt(1)->IsSimd128Register()) {
2992 2130 : XMMRegister src1 = i.InputSimd128Register(1);
2993 2130 : if (src1 != dst) __ movups(dst, src1);
2994 : } else {
2995 0 : __ movups(dst, i.InputOperand(1));
2996 : }
2997 8520 : for (int j = 5; j > 1; j--) {
2998 8520 : uint32_t lanes = i.InputUint32(j);
2999 42600 : for (int k = 0; k < 32; k += 8) {
3000 34080 : uint8_t lane = lanes >> k;
3001 34080 : mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3002 : }
3003 : }
3004 2130 : SetupShuffleMaskOnStack(tasm(), mask1);
3005 4260 : __ pshufb(dst, Operand(rsp, 0));
3006 : __ por(dst, kScratchDoubleReg);
3007 : }
3008 : __ movq(rsp, tmp);
3009 : break;
3010 : }
3011 : case kX64S32x4Swizzle: {
3012 : DCHECK_EQ(2, instr->InputCount());
3013 3860 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
3014 : i.InputInt8(1));
3015 : break;
3016 : }
3017 : case kX64S32x4Shuffle: {
3018 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3019 : DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3020 : int8_t shuffle = i.InputInt8(2);
3021 : DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3022 5655 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
3023 5685 : ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
3024 3790 : __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3025 : break;
3026 : }
3027 : case kX64S16x8Blend: {
3028 280 : ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3029 140 : break;
3030 : }
3031 : case kX64S16x8HalfShuffle1: {
3032 920 : XMMRegister dst = i.OutputSimd128Register();
3033 3680 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
3034 920 : __ pshufhw(dst, dst, i.InputInt8(2));
3035 : break;
3036 : }
3037 : case kX64S16x8HalfShuffle2: {
3038 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3039 610 : XMMRegister dst = i.OutputSimd128Register();
3040 2440 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
3041 610 : __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3042 2440 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
3043 610 : __ pshufhw(dst, dst, i.InputInt8(3));
3044 610 : __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3045 : break;
3046 : }
3047 : case kX64S8x16Alignr: {
3048 1200 : ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3049 600 : break;
3050 : }
3051 : case kX64S16x8Dup: {
3052 475 : XMMRegister dst = i.OutputSimd128Register();
3053 475 : int8_t lane = i.InputInt8(1) & 0x7;
3054 475 : int8_t lane4 = lane & 0x3;
3055 475 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3056 475 : if (lane < 4) {
3057 1425 : ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
3058 475 : __ pshufd(dst, dst, 0);
3059 : } else {
3060 0 : ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
3061 0 : __ pshufd(dst, dst, 0xaa);
3062 : }
3063 : break;
3064 : }
3065 : case kX64S8x16Dup: {
3066 : XMMRegister dst = i.OutputSimd128Register();
3067 610 : int8_t lane = i.InputInt8(1) & 0xf;
3068 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3069 610 : if (lane < 8) {
3070 570 : __ punpcklbw(dst, dst);
3071 : } else {
3072 40 : __ punpckhbw(dst, dst);
3073 : }
3074 610 : lane &= 0x7;
3075 610 : int8_t lane4 = lane & 0x3;
3076 610 : int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3077 610 : if (lane < 4) {
3078 570 : __ pshuflw(dst, dst, half_dup);
3079 570 : __ pshufd(dst, dst, 0);
3080 : } else {
3081 40 : __ pshufhw(dst, dst, half_dup);
3082 40 : __ pshufd(dst, dst, 0xaa);
3083 : }
3084 : break;
3085 : }
3086 : case kX64S64x2UnpackHigh:
3087 0 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3088 : break;
3089 : case kX64S32x4UnpackHigh:
3090 2060 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3091 : break;
3092 : case kX64S16x8UnpackHigh:
3093 2420 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3094 : break;
3095 : case kX64S8x16UnpackHigh:
3096 1340 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3097 : break;
3098 : case kX64S64x2UnpackLow:
3099 160 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3100 : break;
3101 : case kX64S32x4UnpackLow:
3102 1180 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3103 : break;
3104 : case kX64S16x8UnpackLow:
3105 1180 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3106 : break;
3107 : case kX64S8x16UnpackLow:
3108 1660 : ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3109 : break;
3110 : case kX64S16x8UnzipHigh: {
3111 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3112 : XMMRegister dst = i.OutputSimd128Register();
3113 : XMMRegister src2 = dst;
3114 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3115 265 : if (instr->InputCount() == 2) {
3116 735 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3117 245 : __ psrld(kScratchDoubleReg, 16);
3118 : src2 = kScratchDoubleReg;
3119 : }
3120 265 : __ psrld(dst, 16);
3121 : __ packusdw(dst, src2);
3122 : break;
3123 : }
3124 : case kX64S16x8UnzipLow: {
3125 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3126 : XMMRegister dst = i.OutputSimd128Register();
3127 : XMMRegister src2 = dst;
3128 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3129 355 : __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3130 355 : if (instr->InputCount() == 2) {
3131 1005 : ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
3132 : src2 = kScratchDoubleReg;
3133 : }
3134 355 : __ pblendw(dst, kScratchDoubleReg, 0xaa);
3135 : __ packusdw(dst, src2);
3136 : break;
3137 : }
3138 : case kX64S8x16UnzipHigh: {
3139 : XMMRegister dst = i.OutputSimd128Register();
3140 : XMMRegister src2 = dst;
3141 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3142 370 : if (instr->InputCount() == 2) {
3143 1035 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3144 350 : __ psrlw(kScratchDoubleReg, 8);
3145 : src2 = kScratchDoubleReg;
3146 : }
3147 370 : __ psrlw(dst, 8);
3148 : __ packuswb(dst, src2);
3149 : break;
3150 : }
3151 : case kX64S8x16UnzipLow: {
3152 : XMMRegister dst = i.OutputSimd128Register();
3153 : XMMRegister src2 = dst;
3154 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3155 395 : if (instr->InputCount() == 2) {
3156 1065 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3157 355 : __ psllw(kScratchDoubleReg, 8);
3158 355 : __ psrlw(kScratchDoubleReg, 8);
3159 : src2 = kScratchDoubleReg;
3160 : }
3161 395 : __ psllw(dst, 8);
3162 395 : __ psrlw(dst, 8);
3163 : __ packuswb(dst, src2);
3164 : break;
3165 : }
3166 : case kX64S8x16TransposeLow: {
3167 : XMMRegister dst = i.OutputSimd128Register();
3168 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3169 220 : __ psllw(dst, 8);
3170 220 : if (instr->InputCount() == 1) {
3171 20 : __ movups(kScratchDoubleReg, dst);
3172 : } else {
3173 : DCHECK_EQ(2, instr->InputCount());
3174 600 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3175 200 : __ psllw(kScratchDoubleReg, 8);
3176 : }
3177 220 : __ psrlw(dst, 8);
3178 : __ por(dst, kScratchDoubleReg);
3179 : break;
3180 : }
3181 : case kX64S8x16TransposeHigh: {
3182 : XMMRegister dst = i.OutputSimd128Register();
3183 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3184 475 : __ psrlw(dst, 8);
3185 475 : if (instr->InputCount() == 1) {
3186 20 : __ movups(kScratchDoubleReg, dst);
3187 : } else {
3188 : DCHECK_EQ(2, instr->InputCount());
3189 1365 : ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
3190 455 : __ psrlw(kScratchDoubleReg, 8);
3191 : }
3192 475 : __ psllw(kScratchDoubleReg, 8);
3193 : __ por(dst, kScratchDoubleReg);
3194 : break;
3195 : }
3196 : case kX64S8x8Reverse:
3197 : case kX64S8x4Reverse:
3198 : case kX64S8x2Reverse: {
3199 : DCHECK_EQ(1, instr->InputCount());
3200 : XMMRegister dst = i.OutputSimd128Register();
3201 : DCHECK_EQ(dst, i.InputSimd128Register(0));
3202 1050 : if (arch_opcode != kX64S8x2Reverse) {
3203 : // First shuffle words into position.
3204 605 : int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3205 605 : __ pshuflw(dst, dst, shuffle_mask);
3206 605 : __ pshufhw(dst, dst, shuffle_mask);
3207 : }
3208 1050 : __ movaps(kScratchDoubleReg, dst);
3209 1050 : __ psrlw(kScratchDoubleReg, 8);
3210 1050 : __ psllw(dst, 8);
3211 : __ por(dst, kScratchDoubleReg);
3212 : break;
3213 : }
3214 : case kX64S1x4AnyTrue:
3215 : case kX64S1x8AnyTrue:
3216 : case kX64S1x16AnyTrue: {
3217 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3218 : Register dst = i.OutputRegister();
3219 : XMMRegister src = i.InputSimd128Register(0);
3220 : Register tmp = i.TempRegister(0);
3221 135 : __ xorq(tmp, tmp);
3222 : __ movq(dst, Immediate(1));
3223 : __ ptest(src, src);
3224 135 : __ cmovq(zero, dst, tmp);
3225 : break;
3226 : }
3227 : case kX64S1x4AllTrue:
3228 : case kX64S1x8AllTrue:
3229 : case kX64S1x16AllTrue: {
3230 : CpuFeatureScope sse_scope(tasm(), SSE4_1);
3231 : Register dst = i.OutputRegister();
3232 : XMMRegister src = i.InputSimd128Register(0);
3233 : Register tmp = i.TempRegister(0);
3234 135 : __ movq(tmp, Immediate(1));
3235 : __ xorq(dst, dst);
3236 : __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3237 : __ pxor(kScratchDoubleReg, src);
3238 : __ ptest(kScratchDoubleReg, kScratchDoubleReg);
3239 135 : __ cmovq(zero, dst, tmp);
3240 : break;
3241 : }
3242 : case kX64StackCheck:
3243 554385 : __ CompareRoot(rsp, RootIndex::kStackLimit);
3244 554404 : break;
3245 : case kWord32AtomicExchangeInt8: {
3246 1872 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3247 1872 : __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3248 936 : break;
3249 : }
3250 : case kWord32AtomicExchangeUint8: {
3251 1980 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3252 990 : __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3253 : break;
3254 : }
3255 : case kWord32AtomicExchangeInt16: {
3256 1540 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3257 1540 : __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3258 770 : break;
3259 : }
3260 : case kWord32AtomicExchangeUint16: {
3261 1316 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3262 658 : __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3263 : break;
3264 : }
3265 : case kWord32AtomicExchangeWord32: {
3266 2376 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3267 : break;
3268 : }
3269 : case kWord32AtomicCompareExchangeInt8: {
3270 112 : __ lock();
3271 224 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3272 112 : __ movsxbl(rax, rax);
3273 112 : break;
3274 : }
3275 : case kWord32AtomicCompareExchangeUint8: {
3276 137 : __ lock();
3277 274 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3278 : __ movzxbl(rax, rax);
3279 : break;
3280 : }
3281 : case kWord32AtomicCompareExchangeInt16: {
3282 112 : __ lock();
3283 224 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3284 112 : __ movsxwl(rax, rax);
3285 112 : break;
3286 : }
3287 : case kWord32AtomicCompareExchangeUint16: {
3288 137 : __ lock();
3289 274 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3290 : __ movzxwl(rax, rax);
3291 : break;
3292 : }
3293 : case kWord32AtomicCompareExchangeWord32: {
3294 258 : __ lock();
3295 258 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3296 : break;
3297 : }
3298 : #define ATOMIC_BINOP_CASE(op, inst) \
3299 : case kWord32Atomic##op##Int8: \
3300 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3301 : __ movsxbl(rax, rax); \
3302 : break; \
3303 : case kWord32Atomic##op##Uint8: \
3304 : ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3305 : __ movzxbl(rax, rax); \
3306 : break; \
3307 : case kWord32Atomic##op##Int16: \
3308 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3309 : __ movsxwl(rax, rax); \
3310 : break; \
3311 : case kWord32Atomic##op##Uint16: \
3312 : ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3313 : __ movzxwl(rax, rax); \
3314 : break; \
3315 : case kWord32Atomic##op##Word32: \
3316 : ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3317 : break;
3318 11536 : ATOMIC_BINOP_CASE(Add, addl)
3319 11776 : ATOMIC_BINOP_CASE(Sub, subl)
3320 10542 : ATOMIC_BINOP_CASE(And, andl)
3321 10610 : ATOMIC_BINOP_CASE(Or, orl)
3322 12464 : ATOMIC_BINOP_CASE(Xor, xorl)
3323 : #undef ATOMIC_BINOP_CASE
3324 : case kX64Word64AtomicExchangeUint8: {
3325 4124 : __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3326 2062 : __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3327 : break;
3328 : }
3329 : case kX64Word64AtomicExchangeUint16: {
3330 1956 : __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3331 978 : __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3332 : break;
3333 : }
3334 : case kX64Word64AtomicExchangeUint32: {
3335 1700 : __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3336 : break;
3337 : }
3338 : case kX64Word64AtomicExchangeUint64: {
3339 1948 : __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3340 : break;
3341 : }
3342 : case kX64Word64AtomicCompareExchangeUint8: {
3343 25 : __ lock();
3344 50 : __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3345 : __ movzxbq(rax, rax);
3346 : break;
3347 : }
3348 : case kX64Word64AtomicCompareExchangeUint16: {
3349 25 : __ lock();
3350 50 : __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3351 : __ movzxwq(rax, rax);
3352 : break;
3353 : }
3354 : case kX64Word64AtomicCompareExchangeUint32: {
3355 25 : __ lock();
3356 25 : __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3357 : break;
3358 : }
3359 : case kX64Word64AtomicCompareExchangeUint64: {
3360 279 : __ lock();
3361 279 : __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3362 : break;
3363 : }
3364 : #define ATOMIC64_BINOP_CASE(op, inst) \
3365 : case kX64Word64Atomic##op##Uint8: \
3366 : ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3367 : __ movzxbq(rax, rax); \
3368 : break; \
3369 : case kX64Word64Atomic##op##Uint16: \
3370 : ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3371 : __ movzxwq(rax, rax); \
3372 : break; \
3373 : case kX64Word64Atomic##op##Uint32: \
3374 : ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3375 : break; \
3376 : case kX64Word64Atomic##op##Uint64: \
3377 : ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3378 : break;
3379 10237 : ATOMIC64_BINOP_CASE(Add, addq)
3380 10952 : ATOMIC64_BINOP_CASE(Sub, subq)
3381 10947 : ATOMIC64_BINOP_CASE(And, andq)
3382 11217 : ATOMIC64_BINOP_CASE(Or, orq)
3383 11460 : ATOMIC64_BINOP_CASE(Xor, xorq)
3384 : #undef ATOMIC64_BINOP_CASE
3385 : case kWord32AtomicLoadInt8:
3386 : case kWord32AtomicLoadUint8:
3387 : case kWord32AtomicLoadInt16:
3388 : case kWord32AtomicLoadUint16:
3389 : case kWord32AtomicLoadWord32:
3390 : case kWord32AtomicStoreWord8:
3391 : case kWord32AtomicStoreWord16:
3392 : case kWord32AtomicStoreWord32:
3393 : case kX64Word64AtomicLoadUint8:
3394 : case kX64Word64AtomicLoadUint16:
3395 : case kX64Word64AtomicLoadUint32:
3396 : case kX64Word64AtomicLoadUint64:
3397 : case kX64Word64AtomicStoreWord8:
3398 : case kX64Word64AtomicStoreWord16:
3399 : case kX64Word64AtomicStoreWord32:
3400 : case kX64Word64AtomicStoreWord64:
3401 0 : UNREACHABLE(); // Won't be generated by instruction selector.
3402 : break;
3403 : }
3404 : return kSuccess;
3405 : } // NOLadability/fn_size)
3406 :
3407 : #undef ASSEMBLE_UNOP
3408 : #undef ASSEMBLE_BINOP
3409 : #undef ASSEMBLE_COMPARE
3410 : #undef ASSEMBLE_MULT
3411 : #undef ASSEMBLE_SHIFT
3412 : #undef ASSEMBLE_MOVX
3413 : #undef ASSEMBLE_SSE_BINOP
3414 : #undef ASSEMBLE_SSE_UNOP
3415 : #undef ASSEMBLE_AVX_BINOP
3416 : #undef ASSEMBLE_IEEE754_BINOP
3417 : #undef ASSEMBLE_IEEE754_UNOP
3418 : #undef ASSEMBLE_ATOMIC_BINOP
3419 : #undef ASSEMBLE_ATOMIC64_BINOP
3420 : #undef ASSEMBLE_SIMD_INSTR
3421 : #undef ASSEMBLE_SIMD_IMM_INSTR
3422 : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
3423 : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
3424 :
3425 : namespace {
3426 :
3427 6185284 : Condition FlagsConditionToCondition(FlagsCondition condition) {
3428 6185284 : switch (condition) {
3429 : case kUnorderedEqual:
3430 : case kEqual:
3431 : return equal;
3432 : case kUnorderedNotEqual:
3433 : case kNotEqual:
3434 1431792 : return not_equal;
3435 : case kSignedLessThan:
3436 224293 : return less;
3437 : case kSignedGreaterThanOrEqual:
3438 56359 : return greater_equal;
3439 : case kSignedLessThanOrEqual:
3440 114447 : return less_equal;
3441 : case kSignedGreaterThan:
3442 76921 : return greater;
3443 : case kUnsignedLessThan:
3444 187857 : return below;
3445 : case kUnsignedGreaterThanOrEqual:
3446 231751 : return above_equal;
3447 : case kUnsignedLessThanOrEqual:
3448 943317 : return below_equal;
3449 : case kUnsignedGreaterThan:
3450 116487 : return above;
3451 : case kOverflow:
3452 203153 : return overflow;
3453 : case kNotOverflow:
3454 2128 : return no_overflow;
3455 : default:
3456 : break;
3457 : }
3458 0 : UNREACHABLE();
3459 : }
3460 :
3461 : } // namespace
3462 :
3463 : // Assembles branches after this instruction.
3464 5189870 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3465 : Label::Distance flabel_distance =
3466 5189870 : branch->fallthru ? Label::kNear : Label::kFar;
3467 5189870 : Label* tlabel = branch->true_label;
3468 5189870 : Label* flabel = branch->false_label;
3469 5189870 : if (branch->condition == kUnorderedEqual) {
3470 49660 : __ j(parity_even, flabel, flabel_distance);
3471 5140210 : } else if (branch->condition == kUnorderedNotEqual) {
3472 116253 : __ j(parity_even, tlabel);
3473 : }
3474 5189869 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3475 :
3476 5189893 : if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3477 5189893 : }
3478 :
3479 0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3480 : Instruction* instr) {
3481 : // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3482 0 : if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3483 0 : return;
3484 : }
3485 :
3486 : condition = NegateFlagsCondition(condition);
3487 0 : __ movl(kScratchRegister, Immediate(0));
3488 : __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3489 0 : kScratchRegister);
3490 : }
3491 :
3492 340295 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3493 299 : BranchInfo* branch) {
3494 : Label::Distance flabel_distance =
3495 340295 : branch->fallthru ? Label::kNear : Label::kFar;
3496 340295 : Label* tlabel = branch->true_label;
3497 340295 : Label* flabel = branch->false_label;
3498 340295 : Label nodeopt;
3499 340295 : if (branch->condition == kUnorderedEqual) {
3500 0 : __ j(parity_even, flabel, flabel_distance);
3501 340295 : } else if (branch->condition == kUnorderedNotEqual) {
3502 4186 : __ j(parity_even, tlabel);
3503 : }
3504 340295 : __ j(FlagsConditionToCondition(branch->condition), tlabel);
3505 :
3506 340294 : if (FLAG_deopt_every_n_times > 0) {
3507 : ExternalReference counter =
3508 299 : ExternalReference::stress_deopt_count(isolate());
3509 :
3510 299 : __ pushfq();
3511 299 : __ pushq(rax);
3512 299 : __ load_rax(counter);
3513 : __ decl(rax);
3514 299 : __ j(not_zero, &nodeopt);
3515 :
3516 299 : __ Set(rax, FLAG_deopt_every_n_times);
3517 299 : __ store_rax(counter);
3518 299 : __ popq(rax);
3519 299 : __ popfq();
3520 299 : __ jmp(tlabel);
3521 :
3522 299 : __ bind(&nodeopt);
3523 299 : __ store_rax(counter);
3524 299 : __ popq(rax);
3525 299 : __ popfq();
3526 : }
3527 :
3528 340294 : if (!branch->fallthru) {
3529 0 : __ jmp(flabel, flabel_distance);
3530 : }
3531 340294 : }
3532 :
3533 8034053 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
3534 8034053 : if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3535 4948683 : }
3536 :
3537 47267 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
3538 47267 : FlagsCondition condition) {
3539 : auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3540 47242 : Label* tlabel = ool->entry();
3541 47242 : Label end;
3542 47242 : if (condition == kUnorderedEqual) {
3543 0 : __ j(parity_even, &end);
3544 47242 : } else if (condition == kUnorderedNotEqual) {
3545 550 : __ j(parity_even, tlabel);
3546 : }
3547 47243 : __ j(FlagsConditionToCondition(condition), tlabel);
3548 47269 : __ bind(&end);
3549 47292 : }
3550 :
3551 : // Assembles boolean materializations after this instruction.
3552 1215762 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3553 : FlagsCondition condition) {
3554 : X64OperandConverter i(this, instr);
3555 607881 : Label done;
3556 :
3557 : // Materialize a full 64-bit 1 or 0 value. The result register is always the
3558 : // last output of the instruction.
3559 607881 : Label check;
3560 : DCHECK_NE(0u, instr->OutputCount());
3561 607881 : Register reg = i.OutputRegister(instr->OutputCount() - 1);
3562 607881 : if (condition == kUnorderedEqual) {
3563 2875 : __ j(parity_odd, &check, Label::kNear);
3564 : __ movl(reg, Immediate(0));
3565 2875 : __ jmp(&done, Label::kNear);
3566 605006 : } else if (condition == kUnorderedNotEqual) {
3567 3305 : __ j(parity_odd, &check, Label::kNear);
3568 : __ movl(reg, Immediate(1));
3569 3305 : __ jmp(&done, Label::kNear);
3570 : }
3571 607881 : __ bind(&check);
3572 607880 : __ setcc(FlagsConditionToCondition(condition), reg);
3573 : __ movzxbl(reg, reg);
3574 607881 : __ bind(&done);
3575 607881 : }
3576 :
3577 514816 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3578 : X64OperandConverter i(this, instr);
3579 39506 : Register input = i.InputRegister(0);
3580 : std::vector<std::pair<int32_t, Label*>> cases;
3581 514816 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3582 653705 : cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3583 : }
3584 : AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3585 79014 : cases.data() + cases.size());
3586 39506 : }
3587 :
3588 0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3589 : X64OperandConverter i(this, instr);
3590 0 : Register input = i.InputRegister(0);
3591 0 : for (size_t index = 2; index < instr->InputCount(); index += 2) {
3592 0 : __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3593 0 : __ j(equal, GetLabel(i.InputRpo(index + 1)));
3594 : }
3595 0 : AssembleArchJump(i.InputRpo(1));
3596 0 : }
3597 :
3598 225754 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3599 : X64OperandConverter i(this, instr);
3600 304 : Register input = i.InputRegister(0);
3601 304 : int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3602 304 : Label** cases = zone()->NewArray<Label*>(case_count);
3603 225146 : for (int32_t index = 0; index < case_count; ++index) {
3604 449684 : cases[index] = GetLabel(i.InputRpo(index + 2));
3605 : }
3606 304 : Label* const table = AddJumpTable(cases, case_count);
3607 304 : __ cmpl(input, Immediate(case_count));
3608 608 : __ j(above_equal, GetLabel(i.InputRpo(1)));
3609 608 : __ leaq(kScratchRegister, Operand(table));
3610 304 : __ jmp(Operand(kScratchRegister, input, times_8, 0));
3611 304 : }
3612 :
3613 : namespace {
3614 :
3615 : static const int kQuadWordSize = 16;
3616 :
3617 : } // namespace
3618 :
3619 2949471 : void CodeGenerator::FinishFrame(Frame* frame) {
3620 5898942 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3621 :
3622 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3623 2949471 : if (saves_fp != 0) {
3624 : frame->AlignSavedCalleeRegisterSlots();
3625 0 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3626 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3627 : frame->AllocateSavedCalleeRegisterSlots(
3628 0 : saves_fp_count * (kQuadWordSize / kSystemPointerSize));
3629 : }
3630 : }
3631 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3632 2949471 : if (saves != 0) { // Save callee-saved registers.
3633 : int count = 0;
3634 18082416 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3635 18082416 : if (((1 << i) & saves)) {
3636 5650755 : ++count;
3637 : }
3638 : }
3639 : frame->AllocateSavedCalleeRegisterSlots(count);
3640 : }
3641 2949471 : }
3642 :
3643 19797356 : void CodeGenerator::AssembleConstructFrame() {
3644 7100095 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3645 2961038 : if (frame_access_state()->has_frame()) {
3646 2961110 : int pc_base = __ pc_offset();
3647 :
3648 2961110 : if (call_descriptor->IsCFunctionCall()) {
3649 1130151 : __ pushq(rbp);
3650 : __ movq(rbp, rsp);
3651 1830959 : } else if (call_descriptor->IsJSFunctionCall()) {
3652 652839 : __ Prologue();
3653 652845 : if (call_descriptor->PushArgumentCount()) {
3654 38861 : __ pushq(kJavaScriptCallArgCountRegister);
3655 : }
3656 : } else {
3657 1178120 : __ StubPrologue(info()->GetOutputStackFrameType());
3658 1178044 : if (call_descriptor->IsWasmFunctionCall()) {
3659 1062358 : __ pushq(kWasmInstanceRegister);
3660 115686 : } else if (call_descriptor->IsWasmImportWrapper()) {
3661 : // WASM import wrappers are passed a tuple in the place of the instance.
3662 : // Unpack the tuple into the instance and the target callable.
3663 : // This must be done here in the codegen because it cannot be expressed
3664 : // properly in the graph.
3665 : __ LoadTaggedPointerField(
3666 : kJSFunctionRegister,
3667 7087 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3668 : __ LoadTaggedPointerField(
3669 : kWasmInstanceRegister,
3670 7087 : FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3671 7087 : __ pushq(kWasmInstanceRegister);
3672 : }
3673 : }
3674 :
3675 2961180 : unwinding_info_writer_.MarkFrameConstructed(pc_base);
3676 : }
3677 2960938 : int shrink_slots = frame()->GetTotalFrameSlotCount() -
3678 2960938 : call_descriptor->CalculateFixedFrameSize();
3679 :
3680 2961105 : if (info()->is_osr()) {
3681 : // TurboFan OSR-compiled functions cannot be entered directly.
3682 4917 : __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3683 :
3684 : // Unoptimized code jumps directly to this entrypoint while the unoptimized
3685 : // frame is still on the stack. Optimized code uses OSR values directly from
3686 : // the unoptimized frame. Thus, all that needs to be done is to allocate the
3687 : // remaining stack slots.
3688 4917 : if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3689 9834 : osr_pc_offset_ = __ pc_offset();
3690 4917 : shrink_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3691 4917 : ResetSpeculationPoison();
3692 : }
3693 :
3694 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3695 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3696 :
3697 2960941 : if (shrink_slots > 0) {
3698 : DCHECK(frame_access_state()->has_frame());
3699 1907061 : if (info()->IsWasm() && shrink_slots > 128) {
3700 : // For WebAssembly functions with big frames we have to do the stack
3701 : // overflow check before we construct the frame. Otherwise we may not
3702 : // have enough space on the stack to call the runtime for the stack
3703 : // overflow.
3704 9 : Label done;
3705 :
3706 : // If the frame is bigger than the stack, we throw the stack overflow
3707 : // exception unconditionally. Thereby we can avoid the integer overflow
3708 : // check in the condition code.
3709 9 : if (shrink_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
3710 : __ movq(kScratchRegister,
3711 : FieldOperand(kWasmInstanceRegister,
3712 9 : WasmInstanceObject::kRealStackLimitAddressOffset));
3713 18 : __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3714 9 : __ addq(kScratchRegister, Immediate(shrink_slots * kSystemPointerSize));
3715 9 : __ cmpq(rsp, kScratchRegister);
3716 9 : __ j(above_equal, &done);
3717 : }
3718 : __ LoadTaggedPointerField(
3719 : rcx, FieldOperand(kWasmInstanceRegister,
3720 9 : WasmInstanceObject::kCEntryStubOffset));
3721 9 : __ Move(rsi, Smi::zero());
3722 9 : __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
3723 : ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3724 : RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3725 9 : Safepoint::kNoLazyDeopt);
3726 9 : __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3727 9 : __ bind(&done);
3728 : }
3729 :
3730 : // Skip callee-saved and return slots, which are created below.
3731 1907061 : shrink_slots -= base::bits::CountPopulation(saves);
3732 : shrink_slots -= base::bits::CountPopulation(saves_fp) *
3733 1907061 : (kQuadWordSize / kSystemPointerSize);
3734 1907061 : shrink_slots -= frame()->GetReturnSlotCount();
3735 1907061 : if (shrink_slots > 0) {
3736 3372810 : __ subq(rsp, Immediate(shrink_slots * kSystemPointerSize));
3737 : }
3738 : }
3739 :
3740 2960986 : if (saves_fp != 0) { // Save callee-saved XMM registers.
3741 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3742 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3743 : // Adjust the stack pointer.
3744 0 : __ subp(rsp, Immediate(stack_size));
3745 : // Store the registers on the stack.
3746 : int slot_idx = 0;
3747 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3748 0 : if (!((1 << i) & saves_fp)) continue;
3749 : __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3750 0 : XMMRegister::from_code(i));
3751 0 : slot_idx++;
3752 : }
3753 : }
3754 :
3755 2960986 : if (saves != 0) { // Save callee-saved registers.
3756 18082416 : for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3757 18082416 : if (!((1 << i) & saves)) continue;
3758 5650755 : __ pushq(Register::from_code(i));
3759 : }
3760 : }
3761 :
3762 : // Allocate return slots (located after callee-saved).
3763 2960986 : if (frame()->GetReturnSlotCount() > 0) {
3764 1676 : __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
3765 : }
3766 2960986 : }
3767 :
3768 6528350 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3769 13056700 : auto call_descriptor = linkage()->GetIncomingDescriptor();
3770 :
3771 : // Restore registers.
3772 : const RegList saves = call_descriptor->CalleeSavedRegisters();
3773 3264175 : if (saves != 0) {
3774 1141861 : const int returns = frame()->GetReturnSlotCount();
3775 1141861 : if (returns != 0) {
3776 1640 : __ addq(rsp, Immediate(returns * kSystemPointerSize));
3777 : }
3778 18269776 : for (int i = 0; i < Register::kNumRegisters; i++) {
3779 18269776 : if (!((1 << i) & saves)) continue;
3780 5709305 : __ popq(Register::from_code(i));
3781 : }
3782 : }
3783 : const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3784 3264175 : if (saves_fp != 0) {
3785 : const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3786 0 : const int stack_size = saves_fp_count * kQuadWordSize;
3787 : // Load the registers from the stack.
3788 : int slot_idx = 0;
3789 0 : for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3790 0 : if (!((1 << i) & saves_fp)) continue;
3791 : __ movdqu(XMMRegister::from_code(i),
3792 0 : Operand(rsp, kQuadWordSize * slot_idx));
3793 0 : slot_idx++;
3794 : }
3795 : // Adjust the stack pointer.
3796 0 : __ addp(rsp, Immediate(stack_size));
3797 : }
3798 :
3799 : unwinding_info_writer_.MarkBlockWillExit();
3800 :
3801 : // Might need rcx for scratch if pop_size is too big or if there is a variable
3802 : // pop count.
3803 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3804 : DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3805 3264175 : size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
3806 : X64OperandConverter g(this, nullptr);
3807 3264175 : if (call_descriptor->IsCFunctionCall()) {
3808 1141861 : AssembleDeconstructFrame();
3809 2122314 : } else if (frame_access_state()->has_frame()) {
3810 4101687 : if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3811 : // Canonicalize JSFunction return sites for now.
3812 2025354 : if (return_label_.is_bound()) {
3813 309139 : __ jmp(&return_label_);
3814 3264292 : return;
3815 : } else {
3816 1716215 : __ bind(&return_label_);
3817 1716240 : AssembleDeconstructFrame();
3818 : }
3819 : } else {
3820 50995 : AssembleDeconstructFrame();
3821 : }
3822 : }
3823 :
3824 2955205 : if (pop->IsImmediate()) {
3825 5808510 : pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
3826 2904270 : CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3827 2904270 : __ Ret(static_cast<int>(pop_size), rcx);
3828 : } else {
3829 : Register pop_reg = g.ToRegister(pop);
3830 50965 : Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3831 50965 : __ popq(scratch_reg);
3832 101930 : __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3833 50965 : __ jmp(scratch_reg);
3834 : }
3835 : }
3836 :
3837 2949296 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3838 :
3839 34754549 : void CodeGenerator::AssembleMove(InstructionOperand* source,
3840 : InstructionOperand* destination) {
3841 : X64OperandConverter g(this, nullptr);
3842 : // Helper function to write the given constant to the dst register.
3843 18245952 : auto MoveConstantToRegister = [&](Register dst, Constant src) {
3844 18245952 : switch (src.type()) {
3845 : case Constant::kInt32: {
3846 4430992 : if (RelocInfo::IsWasmReference(src.rmode())) {
3847 0 : __ movq(dst, src.ToInt64(), src.rmode());
3848 : } else {
3849 4430992 : int32_t value = src.ToInt32();
3850 4430992 : if (value == 0) {
3851 1056102 : __ xorl(dst, dst);
3852 : } else {
3853 3374890 : __ movl(dst, Immediate(value));
3854 : }
3855 : }
3856 : break;
3857 : }
3858 : case Constant::kInt64:
3859 2165785 : if (RelocInfo::IsWasmReference(src.rmode())) {
3860 0 : __ movq(dst, src.ToInt64(), src.rmode());
3861 : } else {
3862 2165785 : __ Set(dst, src.ToInt64());
3863 : }
3864 : break;
3865 : case Constant::kFloat32:
3866 920 : __ MoveNumber(dst, src.ToFloat32());
3867 460 : break;
3868 : case Constant::kFloat64:
3869 1288823 : __ MoveNumber(dst, src.ToFloat64().value());
3870 1288827 : break;
3871 : case Constant::kExternalReference:
3872 5076912 : __ Move(dst, src.ToExternalReference());
3873 2538490 : break;
3874 : case Constant::kHeapObject: {
3875 7819105 : Handle<HeapObject> src_object = src.ToHeapObject();
3876 : RootIndex index;
3877 7819110 : if (IsMaterializableFromRoot(src_object, &index)) {
3878 1623856 : __ LoadRoot(dst, index);
3879 : } else {
3880 6195252 : __ Move(dst, src_object);
3881 : }
3882 : break;
3883 : }
3884 : case Constant::kDelayedStringConstant: {
3885 2227 : const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3886 2227 : __ MoveStringConstant(dst, src_constant);
3887 2227 : break;
3888 : }
3889 : case Constant::kRpoNumber:
3890 0 : UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3891 : break;
3892 : }
3893 53000607 : };
3894 : // Helper function to write the given constant to the stack.
3895 38717 : auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3896 38717 : if (!RelocInfo::IsWasmReference(src.rmode())) {
3897 38717 : switch (src.type()) {
3898 : case Constant::kInt32:
3899 19544 : __ movq(dst, Immediate(src.ToInt32()));
3900 19544 : return;
3901 : case Constant::kInt64:
3902 13355 : __ Set(dst, src.ToInt64());
3903 13355 : return;
3904 : default:
3905 : break;
3906 : }
3907 : }
3908 5818 : MoveConstantToRegister(kScratchRegister, src);
3909 5818 : __ movq(dst, kScratchRegister);
3910 34754549 : };
3911 : // Dispatch on the source and destination operand kinds.
3912 34754549 : switch (MoveType::InferMove(source, destination)) {
3913 : case MoveType::kRegisterToRegister:
3914 4138179 : if (source->IsRegister()) {
3915 4017087 : __ movq(g.ToRegister(destination), g.ToRegister(source));
3916 : } else {
3917 : DCHECK(source->IsFPRegister());
3918 : __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3919 : }
3920 : return;
3921 : case MoveType::kRegisterToStack: {
3922 4349863 : Operand dst = g.ToOperand(destination);
3923 4349863 : if (source->IsRegister()) {
3924 4041135 : __ movq(dst, g.ToRegister(source));
3925 : } else {
3926 : DCHECK(source->IsFPRegister());
3927 : XMMRegister src = g.ToDoubleRegister(source);
3928 : MachineRepresentation rep =
3929 : LocationOperand::cast(source)->representation();
3930 308728 : if (rep != MachineRepresentation::kSimd128) {
3931 : __ Movsd(dst, src);
3932 : } else {
3933 : __ Movups(dst, src);
3934 : }
3935 : }
3936 : return;
3937 : }
3938 : case MoveType::kStackToRegister: {
3939 7601047 : Operand src = g.ToOperand(source);
3940 7601047 : if (source->IsStackSlot()) {
3941 7173432 : __ movq(g.ToRegister(destination), src);
3942 : } else {
3943 : DCHECK(source->IsFPStackSlot());
3944 : XMMRegister dst = g.ToDoubleRegister(destination);
3945 : MachineRepresentation rep =
3946 : LocationOperand::cast(source)->representation();
3947 427615 : if (rep != MachineRepresentation::kSimd128) {
3948 : __ Movsd(dst, src);
3949 : } else {
3950 : __ Movups(dst, src);
3951 : }
3952 : }
3953 : return;
3954 : }
3955 : case MoveType::kStackToStack: {
3956 47847 : Operand src = g.ToOperand(source);
3957 47847 : Operand dst = g.ToOperand(destination);
3958 47847 : if (source->IsStackSlot()) {
3959 : // Spill on demand to use a temporary register for memory-to-memory
3960 : // moves.
3961 26072 : __ movq(kScratchRegister, src);
3962 : __ movq(dst, kScratchRegister);
3963 : } else {
3964 : MachineRepresentation rep =
3965 : LocationOperand::cast(source)->representation();
3966 21775 : if (rep != MachineRepresentation::kSimd128) {
3967 : __ Movsd(kScratchDoubleReg, src);
3968 : __ Movsd(dst, kScratchDoubleReg);
3969 : } else {
3970 : DCHECK(source->IsSimd128StackSlot());
3971 : __ Movups(kScratchDoubleReg, src);
3972 : __ Movups(dst, kScratchDoubleReg);
3973 : }
3974 : }
3975 : return;
3976 : }
3977 : case MoveType::kConstantToRegister: {
3978 18570867 : Constant src = g.ToConstant(source);
3979 18570910 : if (destination->IsRegister()) {
3980 18240154 : MoveConstantToRegister(g.ToRegister(destination), src);
3981 : } else {
3982 : DCHECK(destination->IsFPRegister());
3983 330756 : XMMRegister dst = g.ToDoubleRegister(destination);
3984 330756 : if (src.type() == Constant::kFloat32) {
3985 : // TODO(turbofan): Can we do better here?
3986 15408 : __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
3987 : } else {
3988 : DCHECK_EQ(src.type(), Constant::kFloat64);
3989 315348 : __ Move(dst, src.ToFloat64().AsUint64());
3990 : }
3991 : }
3992 : return;
3993 : }
3994 : case MoveType::kConstantToStack: {
3995 46842 : Constant src = g.ToConstant(source);
3996 46842 : Operand dst = g.ToOperand(destination);
3997 46842 : if (destination->IsStackSlot()) {
3998 38717 : MoveConstantToSlot(dst, src);
3999 : } else {
4000 : DCHECK(destination->IsFPStackSlot());
4001 8125 : if (src.type() == Constant::kFloat32) {
4002 3745 : __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4003 : } else {
4004 : DCHECK_EQ(src.type(), Constant::kFloat64);
4005 4380 : __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4006 : __ movq(dst, kScratchRegister);
4007 : }
4008 : }
4009 : return;
4010 : }
4011 : }
4012 0 : UNREACHABLE();
4013 : }
4014 :
4015 88887 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
4016 4132 : InstructionOperand* destination) {
4017 : X64OperandConverter g(this, nullptr);
4018 : // Dispatch on the source and destination operand kinds. Not all
4019 : // combinations are possible.
4020 88887 : switch (MoveType::InferSwap(source, destination)) {
4021 : case MoveType::kRegisterToRegister: {
4022 75804 : if (source->IsRegister()) {
4023 : Register src = g.ToRegister(source);
4024 : Register dst = g.ToRegister(destination);
4025 73056 : __ movq(kScratchRegister, src);
4026 : __ movq(src, dst);
4027 : __ movq(dst, kScratchRegister);
4028 : } else {
4029 : DCHECK(source->IsFPRegister());
4030 : XMMRegister src = g.ToDoubleRegister(source);
4031 : XMMRegister dst = g.ToDoubleRegister(destination);
4032 : __ Movapd(kScratchDoubleReg, src);
4033 : __ Movapd(src, dst);
4034 : __ Movapd(dst, kScratchDoubleReg);
4035 : }
4036 : return;
4037 : }
4038 : case MoveType::kRegisterToStack: {
4039 8361 : if (source->IsRegister()) {
4040 : Register src = g.ToRegister(source);
4041 2066 : __ pushq(src);
4042 : frame_access_state()->IncreaseSPDelta(1);
4043 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4044 4132 : kSystemPointerSize);
4045 : __ movq(src, g.ToOperand(destination));
4046 : frame_access_state()->IncreaseSPDelta(-1);
4047 2066 : __ popq(g.ToOperand(destination));
4048 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4049 4132 : -kSystemPointerSize);
4050 : } else {
4051 : DCHECK(source->IsFPRegister());
4052 : XMMRegister src = g.ToDoubleRegister(source);
4053 6295 : Operand dst = g.ToOperand(destination);
4054 : MachineRepresentation rep =
4055 : LocationOperand::cast(source)->representation();
4056 6295 : if (rep != MachineRepresentation::kSimd128) {
4057 : __ Movsd(kScratchDoubleReg, src);
4058 : __ Movsd(src, dst);
4059 : __ Movsd(dst, kScratchDoubleReg);
4060 : } else {
4061 : __ Movups(kScratchDoubleReg, src);
4062 : __ Movups(src, dst);
4063 : __ Movups(dst, kScratchDoubleReg);
4064 : }
4065 : }
4066 : return;
4067 : }
4068 : case MoveType::kStackToStack: {
4069 4722 : Operand src = g.ToOperand(source);
4070 4722 : Operand dst = g.ToOperand(destination);
4071 : MachineRepresentation rep =
4072 : LocationOperand::cast(source)->representation();
4073 4722 : if (rep != MachineRepresentation::kSimd128) {
4074 : Register tmp = kScratchRegister;
4075 3587 : __ movq(tmp, dst);
4076 3587 : __ pushq(src); // Then use stack to copy src to destination.
4077 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4078 7174 : kSystemPointerSize);
4079 3587 : __ popq(dst);
4080 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4081 7174 : -kSystemPointerSize);
4082 : __ movq(src, tmp);
4083 : } else {
4084 : // Without AVX, misaligned reads and writes will trap. Move using the
4085 : // stack, in two parts.
4086 1135 : __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4087 1135 : __ pushq(src); // Then use stack to copy src to destination.
4088 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4089 2270 : kSystemPointerSize);
4090 1135 : __ popq(dst);
4091 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4092 2270 : -kSystemPointerSize);
4093 1135 : __ pushq(g.ToOperand(source, kSystemPointerSize));
4094 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4095 2270 : kSystemPointerSize);
4096 1135 : __ popq(g.ToOperand(destination, kSystemPointerSize));
4097 : unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4098 2270 : -kSystemPointerSize);
4099 1135 : __ movups(src, kScratchDoubleReg);
4100 : }
4101 : return;
4102 : }
4103 : default:
4104 0 : UNREACHABLE();
4105 : break;
4106 : }
4107 : }
4108 :
4109 304 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4110 225146 : for (size_t index = 0; index < target_count; ++index) {
4111 224842 : __ dq(targets[index]);
4112 : }
4113 304 : }
4114 :
4115 : #undef __
4116 :
4117 : } // namespace compiler
4118 : } // namespace internal
4119 183867 : } // namespace v8
|