LCOV - code coverage report
Current view: top level - src/compiler/backend/x64 - code-generator-x64.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1400 1585 88.3 %
Date: 2019-04-18 Functions: 41 58 70.7 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/compiler/backend/code-generator.h"
       6             : 
       7             : #include <limits>
       8             : 
       9             : #include "src/base/overflowing-math.h"
      10             : #include "src/compiler/backend/code-generator-impl.h"
      11             : #include "src/compiler/backend/gap-resolver.h"
      12             : #include "src/compiler/node-matchers.h"
      13             : #include "src/compiler/osr.h"
      14             : #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
      15             : #include "src/macro-assembler.h"
      16             : #include "src/objects/smi.h"
      17             : #include "src/optimized-compilation-info.h"
      18             : #include "src/wasm/wasm-code-manager.h"
      19             : #include "src/wasm/wasm-objects.h"
      20             : #include "src/x64/assembler-x64.h"
      21             : 
      22             : namespace v8 {
      23             : namespace internal {
      24             : namespace compiler {
      25             : 
      26             : #define __ tasm()->
      27             : 
      28             : // Adds X64 specific methods for decoding operands.
      29             : class X64OperandConverter : public InstructionOperandConverter {
      30             :  public:
      31             :   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
      32             :       : InstructionOperandConverter(gen, instr) {}
      33             : 
      34             :   Immediate InputImmediate(size_t index) {
      35     5335996 :     return ToImmediate(instr_->InputAt(index));
      36             :   }
      37             : 
      38     1056256 :   Operand InputOperand(size_t index, int extra = 0) {
      39     2112523 :     return ToOperand(instr_->InputAt(index), extra);
      40             :   }
      41             : 
      42           0 :   Operand OutputOperand() { return ToOperand(instr_->Output()); }
      43             : 
      44     4382334 :   Immediate ToImmediate(InstructionOperand* operand) {
      45     4382334 :     Constant constant = ToConstant(operand);
      46     4382452 :     if (constant.type() == Constant::kFloat64) {
      47             :       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
      48      365963 :       return Immediate(0);
      49             :     }
      50     4016489 :     if (RelocInfo::IsWasmReference(constant.rmode())) {
      51           0 :       return Immediate(constant.ToInt32(), constant.rmode());
      52             :     }
      53     4016489 :     return Immediate(constant.ToInt32());
      54             :   }
      55             : 
      56             :   Operand ToOperand(InstructionOperand* op, int extra = 0) {
      57             :     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
      58    15785324 :     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
      59             :   }
      60             : 
      61    15798210 :   Operand SlotToOperand(int slot_index, int extra = 0) {
      62    15798210 :     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
      63             :     return Operand(offset.from_stack_pointer() ? rsp : rbp,
      64    31596262 :                    offset.offset() + extra);
      65             :   }
      66             : 
      67             :   static size_t NextOffset(size_t* offset) {
      68    16339771 :     size_t i = *offset;
      69    29251325 :     (*offset)++;
      70             :     return i;
      71             :   }
      72             : 
      73             :   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
      74             :     STATIC_ASSERT(0 == static_cast<int>(times_1));
      75             :     STATIC_ASSERT(1 == static_cast<int>(times_2));
      76             :     STATIC_ASSERT(2 == static_cast<int>(times_4));
      77             :     STATIC_ASSERT(3 == static_cast<int>(times_8));
      78     1302136 :     int scale = static_cast<int>(mode - one);
      79             :     DCHECK(scale >= 0 && scale < 4);
      80     1302136 :     return static_cast<ScaleFactor>(scale);
      81             :   }
      82             : 
      83    16339771 :   Operand MemoryOperand(size_t* offset) {
      84    16339771 :     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
      85    16339771 :     switch (mode) {
      86             :       case kMode_MR: {
      87     2362025 :         Register base = InputRegister(NextOffset(offset));
      88             :         int32_t disp = 0;
      89     2362025 :         return Operand(base, disp);
      90             :       }
      91             :       case kMode_MRI: {
      92    11173551 :         Register base = InputRegister(NextOffset(offset));
      93             :         int32_t disp = InputInt32(NextOffset(offset));
      94    11173380 :         return Operand(base, disp);
      95             :       }
      96             :       case kMode_MR1:
      97             :       case kMode_MR2:
      98             :       case kMode_MR4:
      99             :       case kMode_MR8: {
     100      696983 :         Register base = InputRegister(NextOffset(offset));
     101      696983 :         Register index = InputRegister(NextOffset(offset));
     102             :         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
     103             :         int32_t disp = 0;
     104      696983 :         return Operand(base, index, scale, disp);
     105             :       }
     106             :       case kMode_MR1I:
     107             :       case kMode_MR2I:
     108             :       case kMode_MR4I:
     109             :       case kMode_MR8I: {
     110      455620 :         Register base = InputRegister(NextOffset(offset));
     111      455620 :         Register index = InputRegister(NextOffset(offset));
     112             :         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
     113             :         int32_t disp = InputInt32(NextOffset(offset));
     114      455618 :         return Operand(base, index, scale, disp);
     115             :       }
     116             :       case kMode_M1: {
     117           0 :         Register base = InputRegister(NextOffset(offset));
     118             :         int32_t disp = 0;
     119           0 :         return Operand(base, disp);
     120             :       }
     121             :       case kMode_M2:
     122           0 :         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
     123             :         return Operand(no_reg, 0);
     124             :       case kMode_M4:
     125             :       case kMode_M8: {
     126       19753 :         Register index = InputRegister(NextOffset(offset));
     127             :         ScaleFactor scale = ScaleFor(kMode_M1, mode);
     128             :         int32_t disp = 0;
     129       19753 :         return Operand(index, scale, disp);
     130             :       }
     131             :       case kMode_M1I:
     132             :       case kMode_M2I:
     133             :       case kMode_M4I:
     134             :       case kMode_M8I: {
     135      129780 :         Register index = InputRegister(NextOffset(offset));
     136             :         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
     137             :         int32_t disp = InputInt32(NextOffset(offset));
     138      129780 :         return Operand(index, scale, disp);
     139             :       }
     140             :       case kMode_Root: {
     141     1502059 :         Register base = kRootRegister;
     142             :         int32_t disp = InputInt32(NextOffset(offset));
     143     1502060 :         return Operand(base, disp);
     144             :       }
     145             :       case kMode_None:
     146           0 :         UNREACHABLE();
     147             :     }
     148           0 :     UNREACHABLE();
     149             :   }
     150             : 
     151             :   Operand MemoryOperand(size_t first_input = 0) {
     152     9239072 :     return MemoryOperand(&first_input);
     153             :   }
     154             : };
     155             : 
     156             : namespace {
     157             : 
     158             : bool HasImmediateInput(Instruction* instr, size_t index) {
     159             :   return instr->InputAt(index)->IsImmediate();
     160             : }
     161             : 
     162           0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
     163             :  public:
     164             :   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
     165         132 :       : OutOfLineCode(gen), result_(result) {}
     166             : 
     167         132 :   void Generate() final {
     168             :     __ Xorps(result_, result_);
     169             :     __ Divss(result_, result_);
     170         132 :   }
     171             : 
     172             :  private:
     173             :   XMMRegister const result_;
     174             : };
     175             : 
     176           0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
     177             :  public:
     178             :   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
     179         592 :       : OutOfLineCode(gen), result_(result) {}
     180             : 
     181         592 :   void Generate() final {
     182             :     __ Xorpd(result_, result_);
     183             :     __ Divsd(result_, result_);
     184         592 :   }
     185             : 
     186             :  private:
     187             :   XMMRegister const result_;
     188             : };
     189             : 
     190           0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
     191             :  public:
     192             :   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
     193             :                              XMMRegister input, StubCallMode stub_mode,
     194             :                              UnwindingInfoWriter* unwinding_info_writer)
     195             :       : OutOfLineCode(gen),
     196             :         result_(result),
     197             :         input_(input),
     198             :         stub_mode_(stub_mode),
     199             :         unwinding_info_writer_(unwinding_info_writer),
     200             :         isolate_(gen->isolate()),
     201       53313 :         zone_(gen->zone()) {}
     202             : 
     203       53310 :   void Generate() final {
     204       53310 :     __ subq(rsp, Immediate(kDoubleSize));
     205       53312 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     206       53312 :                                                       kDoubleSize);
     207      106626 :     __ Movsd(MemOperand(rsp, 0), input_);
     208       53312 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     209             :       // A direct call to a wasm runtime stub defined in this module.
     210             :       // Just encode the stub index. This will be patched when the code
     211             :       // is added to the native module and copied into wasm code space.
     212        1625 :       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
     213             :     } else {
     214      103374 :       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     215             :     }
     216      106632 :     __ movl(result_, MemOperand(rsp, 0));
     217       53316 :     __ addq(rsp, Immediate(kDoubleSize));
     218       53316 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     219       53316 :                                                       -kDoubleSize);
     220       53316 :   }
     221             : 
     222             :  private:
     223             :   Register const result_;
     224             :   XMMRegister const input_;
     225             :   StubCallMode stub_mode_;
     226             :   UnwindingInfoWriter* const unwinding_info_writer_;
     227             :   Isolate* isolate_;
     228             :   Zone* zone_;
     229             : };
     230             : 
     231           0 : class OutOfLineRecordWrite final : public OutOfLineCode {
     232             :  public:
     233             :   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
     234             :                        Register value, Register scratch0, Register scratch1,
     235             :                        RecordWriteMode mode, StubCallMode stub_mode)
     236             :       : OutOfLineCode(gen),
     237             :         object_(object),
     238             :         operand_(operand),
     239             :         value_(value),
     240             :         scratch0_(scratch0),
     241             :         scratch1_(scratch1),
     242             :         mode_(mode),
     243             :         stub_mode_(stub_mode),
     244      319799 :         zone_(gen->zone()) {}
     245             : 
     246      319798 :   void Generate() final {
     247      319798 :     if (mode_ > RecordWriteMode::kValueIsPointer) {
     248      252921 :       __ JumpIfSmi(value_, exit());
     249             :     }
     250             :     if (COMPRESS_POINTERS_BOOL) {
     251             :       __ DecompressTaggedPointer(value_, value_);
     252             :     }
     253             :     __ CheckPageFlag(value_, scratch0_,
     254             :                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
     255      319799 :                      exit());
     256      319799 :     __ leaq(scratch1_, operand_);
     257             : 
     258             :     RememberedSetAction const remembered_set_action =
     259      319799 :         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
     260      319799 :                                              : OMIT_REMEMBERED_SET;
     261             :     SaveFPRegsMode const save_fp_mode =
     262      319799 :         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
     263             : 
     264      319799 :     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
     265         112 :       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
     266      319687 :     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     267             :       // A direct call to a wasm runtime stub defined in this module.
     268             :       // Just encode the stub index. This will be patched when the code
     269             :       // is added to the native module and copied into wasm code space.
     270             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     271         286 :                              save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
     272             :     } else {
     273             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     274      319401 :                              save_fp_mode);
     275             :     }
     276      319799 :   }
     277             : 
     278             :  private:
     279             :   Register const object_;
     280             :   Operand const operand_;
     281             :   Register const value_;
     282             :   Register const scratch0_;
     283             :   Register const scratch1_;
     284             :   RecordWriteMode const mode_;
     285             :   StubCallMode const stub_mode_;
     286             :   Zone* zone_;
     287             : };
     288             : 
     289           0 : class WasmOutOfLineTrap : public OutOfLineCode {
     290             :  public:
     291             :   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
     292      365784 :       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
     293             : 
     294      142017 :   void Generate() override {
     295      142017 :     X64OperandConverter i(gen_, instr_);
     296             :     TrapId trap_id =
     297      284085 :         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
     298             :     GenerateWithTrapId(trap_id);
     299      142115 :   }
     300             : 
     301             :  protected:
     302             :   CodeGenerator* gen_;
     303             : 
     304      365611 :   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
     305             : 
     306             :  private:
     307      365475 :   void GenerateCallToTrap(TrapId trap_id) {
     308      365475 :     if (!gen_->wasm_runtime_exception_support()) {
     309             :       // We cannot test calls to the runtime in cctest/test-run-wasm.
     310             :       // Therefore we emit a call to C here instead of a call to the runtime.
     311      153836 :       __ PrepareCallCFunction(0);
     312      153836 :       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
     313      153836 :                        0);
     314      153836 :       __ LeaveFrame(StackFrame::WASM_COMPILED);
     315      153836 :       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
     316             :       size_t pop_size =
     317      153836 :           call_descriptor->StackParameterCount() * kSystemPointerSize;
     318             :       // Use rcx as a scratch register, we return anyways immediately.
     319      153836 :       __ Ret(static_cast<int>(pop_size), rcx);
     320             :     } else {
     321      211719 :       gen_->AssembleSourcePosition(instr_);
     322             :       // A direct call to a wasm runtime stub defined in this module.
     323             :       // Just encode the stub index. This will be patched when the code
     324             :       // is added to the native module and copied into wasm code space.
     325      211976 :       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
     326             :       ReferenceMap* reference_map =
     327      211796 :           new (gen_->zone()) ReferenceMap(gen_->zone());
     328      211600 :       gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
     329      211600 :                             Safepoint::kNoLazyDeopt);
     330      212115 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
     331             :     }
     332      365643 :   }
     333             : 
     334             :   Instruction* instr_;
     335             : };
     336             : 
     337           0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
     338             :  public:
     339             :   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
     340      223397 :       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
     341             : 
     342      223342 :   void Generate() final {
     343      223342 :     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
     344      223543 :     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
     345      223570 :   }
     346             : 
     347             :  private:
     348             :   int pc_;
     349             : };
     350             : 
     351    12520123 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
     352             :                          InstructionCode opcode, Instruction* instr,
     353             :                          X64OperandConverter& i, int pc) {
     354             :   const MemoryAccessMode access_mode =
     355    12520123 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     356    12520123 :   if (access_mode == kMemoryAccessProtected) {
     357             :     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
     358             :   }
     359    12519801 : }
     360             : 
     361    11855842 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
     362             :                                    InstructionCode opcode, Instruction* instr,
     363             :                                    X64OperandConverter& i) {
     364             :   const MemoryAccessMode access_mode =
     365    11855842 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     366    11855842 :   if (access_mode == kMemoryAccessPoisoned) {
     367             :     Register value = i.OutputRegister();
     368           0 :     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
     369             :   }
     370    11855842 : }
     371             : 
     372             : }  // namespace
     373             : 
     374             : #define ASSEMBLE_UNOP(asm_instr)         \
     375             :   do {                                   \
     376             :     if (instr->Output()->IsRegister()) { \
     377             :       __ asm_instr(i.OutputRegister());  \
     378             :     } else {                             \
     379             :       __ asm_instr(i.OutputOperand());   \
     380             :     }                                    \
     381             :   } while (false)
     382             : 
     383             : #define ASSEMBLE_BINOP(asm_instr)                                     \
     384             :   do {                                                                \
     385             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     386             :       size_t index = 1;                                               \
     387             :       Operand right = i.MemoryOperand(&index);                        \
     388             :       __ asm_instr(i.InputRegister(0), right);                        \
     389             :     } else {                                                          \
     390             :       if (HasImmediateInput(instr, 1)) {                              \
     391             :         if (instr->InputAt(0)->IsRegister()) {                        \
     392             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     393             :         } else {                                                      \
     394             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     395             :         }                                                             \
     396             :       } else {                                                        \
     397             :         if (instr->InputAt(1)->IsRegister()) {                        \
     398             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     399             :         } else {                                                      \
     400             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     401             :         }                                                             \
     402             :       }                                                               \
     403             :     }                                                                 \
     404             :   } while (false)
     405             : 
     406             : #define ASSEMBLE_COMPARE(asm_instr)                                   \
     407             :   do {                                                                \
     408             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     409             :       size_t index = 0;                                               \
     410             :       Operand left = i.MemoryOperand(&index);                         \
     411             :       if (HasImmediateInput(instr, index)) {                          \
     412             :         __ asm_instr(left, i.InputImmediate(index));                  \
     413             :       } else {                                                        \
     414             :         __ asm_instr(left, i.InputRegister(index));                   \
     415             :       }                                                               \
     416             :     } else {                                                          \
     417             :       if (HasImmediateInput(instr, 1)) {                              \
     418             :         if (instr->InputAt(0)->IsRegister()) {                        \
     419             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     420             :         } else {                                                      \
     421             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     422             :         }                                                             \
     423             :       } else {                                                        \
     424             :         if (instr->InputAt(1)->IsRegister()) {                        \
     425             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     426             :         } else {                                                      \
     427             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     428             :         }                                                             \
     429             :       }                                                               \
     430             :     }                                                                 \
     431             :   } while (false)
     432             : 
     433             : #define ASSEMBLE_MULT(asm_instr)                              \
     434             :   do {                                                        \
     435             :     if (HasImmediateInput(instr, 1)) {                        \
     436             :       if (instr->InputAt(0)->IsRegister()) {                  \
     437             :         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
     438             :                      i.InputImmediate(1));                    \
     439             :       } else {                                                \
     440             :         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
     441             :                      i.InputImmediate(1));                    \
     442             :       }                                                       \
     443             :     } else {                                                  \
     444             :       if (instr->InputAt(1)->IsRegister()) {                  \
     445             :         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
     446             :       } else {                                                \
     447             :         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
     448             :       }                                                       \
     449             :     }                                                         \
     450             :   } while (false)
     451             : 
     452             : #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
     453             :   do {                                                                     \
     454             :     if (HasImmediateInput(instr, 1)) {                                     \
     455             :       if (instr->Output()->IsRegister()) {                                 \
     456             :         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
     457             :       } else {                                                             \
     458             :         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
     459             :       }                                                                    \
     460             :     } else {                                                               \
     461             :       if (instr->Output()->IsRegister()) {                                 \
     462             :         __ asm_instr##_cl(i.OutputRegister());                             \
     463             :       } else {                                                             \
     464             :         __ asm_instr##_cl(i.OutputOperand());                              \
     465             :       }                                                                    \
     466             :     }                                                                      \
     467             :   } while (false)
     468             : 
     469             : #define ASSEMBLE_MOVX(asm_instr)                            \
     470             :   do {                                                      \
     471             :     if (instr->addressing_mode() != kMode_None) {           \
     472             :       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
     473             :     } else if (instr->InputAt(0)->IsRegister()) {           \
     474             :       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     475             :     } else {                                                \
     476             :       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
     477             :     }                                                       \
     478             :   } while (false)
     479             : 
     480             : #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
     481             :   do {                                                                  \
     482             :     if (instr->InputAt(1)->IsFPRegister()) {                            \
     483             :       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
     484             :     } else {                                                            \
     485             :       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
     486             :     }                                                                   \
     487             :   } while (false)
     488             : 
     489             : #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
     490             :   do {                                                                  \
     491             :     if (instr->InputAt(0)->IsFPRegister()) {                            \
     492             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
     493             :     } else {                                                            \
     494             :       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
     495             :     }                                                                   \
     496             :   } while (false)
     497             : 
     498             : #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
     499             :   do {                                                                 \
     500             :     CpuFeatureScope avx_scope(tasm(), AVX);                            \
     501             :     if (instr->InputAt(1)->IsFPRegister()) {                           \
     502             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     503             :                    i.InputDoubleRegister(1));                          \
     504             :     } else {                                                           \
     505             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     506             :                    i.InputOperand(1));                                 \
     507             :     }                                                                  \
     508             :   } while (false)
     509             : 
     510             : #define ASSEMBLE_IEEE754_BINOP(name)                                     \
     511             :   do {                                                                   \
     512             :     __ PrepareCallCFunction(2);                                          \
     513             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
     514             :   } while (false)
     515             : 
     516             : #define ASSEMBLE_IEEE754_UNOP(name)                                      \
     517             :   do {                                                                   \
     518             :     __ PrepareCallCFunction(1);                                          \
     519             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
     520             :   } while (false)
     521             : 
     522             : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     523             :   do {                                                          \
     524             :     Label binop;                                                \
     525             :     __ bind(&binop);                                            \
     526             :     __ mov_inst(rax, i.MemoryOperand(1));                       \
     527             :     __ movl(i.TempRegister(0), rax);                            \
     528             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
     529             :     __ lock();                                                  \
     530             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
     531             :     __ j(not_equal, &binop);                                    \
     532             :   } while (false)
     533             : 
     534             : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     535             :   do {                                                            \
     536             :     Label binop;                                                  \
     537             :     __ bind(&binop);                                              \
     538             :     __ mov_inst(rax, i.MemoryOperand(1));                         \
     539             :     __ movq(i.TempRegister(0), rax);                              \
     540             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
     541             :     __ lock();                                                    \
     542             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
     543             :     __ j(not_equal, &binop);                                      \
     544             :   } while (false)
     545             : 
     546             : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
     547             :   do {                                                       \
     548             :     if (instr->InputAt(index)->IsSimd128Register()) {        \
     549             :       __ opcode(dst_operand, i.InputSimd128Register(index)); \
     550             :     } else {                                                 \
     551             :       __ opcode(dst_operand, i.InputOperand(index));         \
     552             :     }                                                        \
     553             :   } while (false)
     554             : 
     555             : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
     556             :   do {                                                            \
     557             :     if (instr->InputAt(index)->IsSimd128Register()) {             \
     558             :       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
     559             :     } else {                                                      \
     560             :       __ opcode(dst_operand, i.InputOperand(index), imm);         \
     561             :     }                                                             \
     562             :   } while (false)
     563             : 
     564             : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
     565             :   do {                                                   \
     566             :     XMMRegister dst = i.OutputSimd128Register();         \
     567             :     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
     568             :     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
     569             :     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
     570             :   } while (false)
     571             : 
     572             : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)                  \
     573             :   do {                                                                    \
     574             :     CpuFeatureScope sse_scope(tasm(), SSELevel);                          \
     575             :     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
     576             :     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
     577             :   } while (false)
     578             : 
     579     2637246 : void CodeGenerator::AssembleDeconstructFrame() {
     580     2637246 :   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
     581     2637517 :   __ movq(rsp, rbp);
     582     2638544 :   __ popq(rbp);
     583     2638260 : }
     584             : 
     585      119850 : void CodeGenerator::AssemblePrepareTailCall() {
     586      119850 :   if (frame_access_state()->has_frame()) {
     587      137125 :     __ movq(rbp, MemOperand(rbp, 0));
     588             :   }
     589             :   frame_access_state()->SetFrameAccessToSP();
     590      119851 : }
     591             : 
     592        1288 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
     593             :                                                      Register scratch1,
     594             :                                                      Register scratch2,
     595             :                                                      Register scratch3) {
     596             :   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
     597        1288 :   Label done;
     598             : 
     599             :   // Check if current frame is an arguments adaptor frame.
     600        2576 :   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
     601        1288 :           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
     602        1288 :   __ j(not_equal, &done, Label::kNear);
     603             : 
     604             :   // Load arguments count from current arguments adaptor frame (note, it
     605             :   // does not include receiver).
     606        1288 :   Register caller_args_count_reg = scratch1;
     607        2576 :   __ SmiUntag(caller_args_count_reg,
     608        1288 :               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
     609             : 
     610             :   ParameterCount callee_args_count(args_reg);
     611             :   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
     612        1288 :                         scratch3);
     613        1288 :   __ bind(&done);
     614        1288 : }
     615             : 
     616             : namespace {
     617             : 
     618      271393 : void AdjustStackPointerForTailCall(Assembler* assembler,
     619             :                                    FrameAccessState* state,
     620             :                                    int new_slot_above_sp,
     621             :                                    bool allow_shrinkage = true) {
     622             :   int current_sp_offset = state->GetSPToFPSlotCount() +
     623      271393 :                           StandardFrameConstants::kFixedSlotCountAboveFp;
     624      271393 :   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
     625      271393 :   if (stack_slot_delta > 0) {
     626         780 :     assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
     627             :     state->IncreaseSPDelta(stack_slot_delta);
     628      270613 :   } else if (allow_shrinkage && stack_slot_delta < 0) {
     629       68215 :     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
     630             :     state->IncreaseSPDelta(stack_slot_delta);
     631             :   }
     632      271393 : }
     633             : 
     634        2096 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
     635        2096 :   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
     636        2096 :   assembler->movq(kScratchRegister, shuffle_mask);
     637        2096 :   assembler->Push(kScratchRegister);
     638        2096 :   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
     639             :   assembler->movq(kScratchRegister, shuffle_mask);
     640        2096 :   assembler->Push(kScratchRegister);
     641        2096 : }
     642             : 
     643             : }  // namespace
     644             : 
     645      119862 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
     646             :                                               int first_unused_stack_slot) {
     647      119862 :   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
     648             :   ZoneVector<MoveOperands*> pushes(zone());
     649      119862 :   GetPushCompatibleMoves(instr, flags, &pushes);
     650             : 
     651      132903 :   if (!pushes.empty() &&
     652       26080 :       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
     653             :        first_unused_stack_slot)) {
     654             :     X64OperandConverter g(this, instr);
     655       44708 :     for (auto move : pushes) {
     656             :       LocationOperand destination_location(
     657             :           LocationOperand::cast(move->destination()));
     658       31668 :       InstructionOperand source(move->source());
     659             :       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     660       31668 :                                     destination_location.index());
     661       31668 :       if (source.IsStackSlot()) {
     662             :         LocationOperand source_location(LocationOperand::cast(source));
     663       13052 :         __ Push(g.SlotToOperand(source_location.index()));
     664       18616 :       } else if (source.IsRegister()) {
     665             :         LocationOperand source_location(LocationOperand::cast(source));
     666       18616 :         __ Push(source_location.GetRegister());
     667           0 :       } else if (source.IsImmediate()) {
     668           0 :         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
     669             :       } else {
     670             :         // Pushes of non-scalar data types is not supported.
     671           0 :         UNIMPLEMENTED();
     672             :       }
     673             :       frame_access_state()->IncreaseSPDelta(1);
     674             :       move->Eliminate();
     675             :     }
     676             :   }
     677             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     678      119863 :                                 first_unused_stack_slot, false);
     679      119862 : }
     680             : 
     681      119863 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
     682             :                                              int first_unused_stack_slot) {
     683             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     684      119863 :                                 first_unused_stack_slot);
     685      119863 : }
     686             : 
     687             : // Check that {kJavaScriptCallCodeStartRegister} is correct.
     688         114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
     689         114 :   __ ComputeCodeStartAddress(rbx);
     690         114 :   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
     691         114 :   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
     692         114 : }
     693             : 
     694             : // Check if the code object is marked for deoptimization. If it is, then it
     695             : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
     696             : // to:
     697             : //    1. read from memory the word that contains that bit, which can be found in
     698             : //       the flags in the referenced {CodeDataContainer} object;
     699             : //    2. test kMarkedForDeoptimizationBit in those flags; and
     700             : //    3. if it is not zero then it jumps to the builtin.
     701      463882 : void CodeGenerator::BailoutIfDeoptimized() {
     702             :   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
     703      927762 :   __ LoadTaggedPointerField(rbx,
     704      463886 :                             Operand(kJavaScriptCallCodeStartRegister, offset));
     705      463890 :   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
     706             :            Immediate(1 << Code::kMarkedForDeoptimizationBit));
     707      463880 :   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
     708      463888 :           RelocInfo::CODE_TARGET, not_zero);
     709      463885 : }
     710             : 
     711           0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
     712             :   // Set a mask which has all bits set in the normal case, but has all
     713             :   // bits cleared if we are speculatively executing the wrong PC.
     714           0 :   __ ComputeCodeStartAddress(rbx);
     715           0 :   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
     716             :   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
     717             :   __ movq(rbx, Immediate(-1));
     718           0 :   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
     719           0 : }
     720             : 
     721           0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
     722           0 :   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
     723             :   __ andq(kContextRegister, kSpeculationPoisonRegister);
     724             :   __ andq(rsp, kSpeculationPoisonRegister);
     725           0 : }
     726             : 
     727             : // Assembles an instruction after register allocation, producing machine code.
     728    68646744 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     729             :     Instruction* instr) {
     730             :   X64OperandConverter i(this, instr);
     731             :   InstructionCode opcode = instr->opcode();
     732    68646744 :   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
     733    68646744 :   switch (arch_opcode) {
     734             :     case kArchCallCodeObject: {
     735     4827589 :       if (HasImmediateInput(instr, 0)) {
     736     4451090 :         Handle<Code> code = i.InputCode(0);
     737     4451090 :         __ Call(code, RelocInfo::CODE_TARGET);
     738             :       } else {
     739      376508 :         Register reg = i.InputRegister(0);
     740             :         DCHECK_IMPLIES(
     741             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     742             :             reg == kJavaScriptCallCodeStartRegister);
     743      376508 :         __ LoadCodeObjectEntry(reg, reg);
     744      376513 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     745           0 :           __ RetpolineCall(reg);
     746             :         } else {
     747      376513 :           __ call(reg);
     748             :         }
     749             :       }
     750     4827621 :       RecordCallPosition(instr);
     751             :       frame_access_state()->ClearSPDelta();
     752             :       break;
     753             :     }
     754             :     case kArchCallBuiltinPointer: {
     755             :       DCHECK(!HasImmediateInput(instr, 0));
     756        3608 :       Register builtin_pointer = i.InputRegister(0);
     757        3608 :       __ CallBuiltinPointer(builtin_pointer);
     758        3608 :       RecordCallPosition(instr);
     759             :       frame_access_state()->ClearSPDelta();
     760             :       break;
     761             :     }
     762             :     case kArchCallWasmFunction: {
     763     1181915 :       if (HasImmediateInput(instr, 0)) {
     764      192442 :         Constant constant = i.ToConstant(instr->InputAt(0));
     765      192491 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     766      192491 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     767      192499 :           __ near_call(wasm_code, constant.rmode());
     768             :         } else {
     769           0 :           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     770           0 :             __ RetpolineCall(wasm_code, constant.rmode());
     771             :           } else {
     772           0 :             __ Call(wasm_code, constant.rmode());
     773             :           }
     774             :         }
     775             :       } else {
     776      989473 :         Register reg = i.InputRegister(0);
     777      989473 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     778           0 :           __ RetpolineCall(reg);
     779             :         } else {
     780      989473 :           __ call(reg);
     781             :         }
     782             :       }
     783     1181915 :       RecordCallPosition(instr);
     784             :       frame_access_state()->ClearSPDelta();
     785             :       break;
     786             :     }
     787             :     case kArchTailCallCodeObjectFromJSFunction:
     788             :     case kArchTailCallCodeObject: {
     789       36864 :       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
     790             :         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
     791             :                                          i.TempRegister(0), i.TempRegister(1),
     792        1288 :                                          i.TempRegister(2));
     793             :       }
     794       36864 :       if (HasImmediateInput(instr, 0)) {
     795       31240 :         Handle<Code> code = i.InputCode(0);
     796       31240 :         __ Jump(code, RelocInfo::CODE_TARGET);
     797             :       } else {
     798        5624 :         Register reg = i.InputRegister(0);
     799             :         DCHECK_IMPLIES(
     800             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     801             :             reg == kJavaScriptCallCodeStartRegister);
     802        5624 :         __ LoadCodeObjectEntry(reg, reg);
     803        5624 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     804           0 :           __ RetpolineJump(reg);
     805             :         } else {
     806        5624 :           __ jmp(reg);
     807             :         }
     808             :       }
     809             :       unwinding_info_writer_.MarkBlockWillExit();
     810             :       frame_access_state()->ClearSPDelta();
     811       36864 :       frame_access_state()->SetFrameAccessToDefault();
     812       36864 :       break;
     813             :     }
     814             :     case kArchTailCallWasm: {
     815         219 :       if (HasImmediateInput(instr, 0)) {
     816         129 :         Constant constant = i.ToConstant(instr->InputAt(0));
     817             :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     818         129 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     819         129 :           __ near_jmp(wasm_code, constant.rmode());
     820             :         } else {
     821             :           __ Move(kScratchRegister, wasm_code, constant.rmode());
     822           0 :           __ jmp(kScratchRegister);
     823             :         }
     824             :       } else {
     825          90 :         Register reg = i.InputRegister(0);
     826          90 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     827           0 :           __ RetpolineJump(reg);
     828             :         } else {
     829          90 :           __ jmp(reg);
     830             :         }
     831             :       }
     832             :       unwinding_info_writer_.MarkBlockWillExit();
     833             :       frame_access_state()->ClearSPDelta();
     834         219 :       frame_access_state()->SetFrameAccessToDefault();
     835         219 :       break;
     836             :     }
     837             :     case kArchTailCallAddress: {
     838       82768 :       CHECK(!HasImmediateInput(instr, 0));
     839       82768 :       Register reg = i.InputRegister(0);
     840             :       DCHECK_IMPLIES(
     841             :           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     842             :           reg == kJavaScriptCallCodeStartRegister);
     843       82768 :       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     844           0 :         __ RetpolineJump(reg);
     845             :       } else {
     846       82768 :         __ jmp(reg);
     847             :       }
     848             :       unwinding_info_writer_.MarkBlockWillExit();
     849             :       frame_access_state()->ClearSPDelta();
     850       82768 :       frame_access_state()->SetFrameAccessToDefault();
     851             :       break;
     852             :     }
     853             :     case kArchCallJSFunction: {
     854             :       Register func = i.InputRegister(0);
     855       23863 :       if (FLAG_debug_code) {
     856             :         // Check the function's context matches the context argument.
     857           8 :         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
     858           8 :         __ Assert(equal, AbortReason::kWrongFunctionContext);
     859             :       }
     860             :       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
     861             :       __ LoadTaggedPointerField(rcx,
     862       23863 :                                 FieldOperand(func, JSFunction::kCodeOffset));
     863       23863 :       __ CallCodeObject(rcx);
     864             :       frame_access_state()->ClearSPDelta();
     865       23863 :       RecordCallPosition(instr);
     866             :       break;
     867             :     }
     868             :     case kArchPrepareCallCFunction: {
     869             :       // Frame alignment requires using FP-relative frame addressing.
     870             :       frame_access_state()->SetFrameAccessToFP();
     871       25882 :       int const num_parameters = MiscField::decode(instr->opcode());
     872       25882 :       __ PrepareCallCFunction(num_parameters);
     873       25882 :       break;
     874             :     }
     875             :     case kArchSaveCallerRegisters: {
     876             :       fp_mode_ =
     877         900 :           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
     878             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     879             :       // kReturnRegister0 should have been saved before entering the stub.
     880         900 :       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
     881             :       DCHECK(IsAligned(bytes, kSystemPointerSize));
     882             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     883         900 :       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     884             :       DCHECK(!caller_registers_saved_);
     885         900 :       caller_registers_saved_ = true;
     886         900 :       break;
     887             :     }
     888             :     case kArchRestoreCallerRegisters: {
     889             :       DCHECK(fp_mode_ ==
     890             :              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
     891             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     892             :       // Don't overwrite the returned value.
     893        1800 :       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
     894         900 :       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
     895             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     896             :       DCHECK(caller_registers_saved_);
     897         900 :       caller_registers_saved_ = false;
     898         900 :       break;
     899             :     }
     900             :     case kArchPrepareTailCall:
     901      119850 :       AssemblePrepareTailCall();
     902      119850 :       break;
     903             :     case kArchCallCFunction: {
     904             :       int const num_parameters = MiscField::decode(instr->opcode());
     905       25882 :       if (HasImmediateInput(instr, 0)) {
     906       24742 :         ExternalReference ref = i.InputExternalReference(0);
     907       24742 :         __ CallCFunction(ref, num_parameters);
     908             :       } else {
     909        1140 :         Register func = i.InputRegister(0);
     910        1140 :         __ CallCFunction(func, num_parameters);
     911             :       }
     912       25882 :       frame_access_state()->SetFrameAccessToDefault();
     913             :       // Ideally, we should decrement SP delta to match the change of stack
     914             :       // pointer in CallCFunction. However, for certain architectures (e.g.
     915             :       // ARM), there may be more strict alignment requirement, causing old SP
     916             :       // to be saved on the stack. In those cases, we can not calculate the SP
     917             :       // delta statically.
     918             :       frame_access_state()->ClearSPDelta();
     919       25882 :       if (caller_registers_saved_) {
     920             :         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
     921             :         // Here, we assume the sequence to be:
     922             :         //   kArchSaveCallerRegisters;
     923             :         //   kArchCallCFunction;
     924             :         //   kArchRestoreCallerRegisters;
     925             :         int bytes =
     926        1800 :             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
     927         900 :         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     928             :       }
     929             :       // TODO(tebbi): Do we need an lfence here?
     930             :       break;
     931             :     }
     932             :     case kArchJmp:
     933     4959087 :       AssembleArchJump(i.InputRpo(0));
     934     4959183 :       break;
     935             :     case kArchBinarySearchSwitch:
     936       34090 :       AssembleArchBinarySearchSwitch(instr);
     937       34091 :       break;
     938             :     case kArchLookupSwitch:
     939           0 :       AssembleArchLookupSwitch(instr);
     940           0 :       break;
     941             :     case kArchTableSwitch:
     942         315 :       AssembleArchTableSwitch(instr);
     943         315 :       break;
     944             :     case kArchComment:
     945           4 :       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
     946           4 :       break;
     947             :     case kArchDebugAbort:
     948             :       DCHECK(i.InputRegister(0) == rdx);
     949         160 :       if (!frame_access_state()->has_frame()) {
     950             :         // We don't actually want to generate a pile of code for this, so just
     951             :         // claim there is a stack frame, without generating one.
     952          16 :         FrameScope scope(tasm(), StackFrame::NONE);
     953          16 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     954          16 :                 RelocInfo::CODE_TARGET);
     955             :       } else {
     956         144 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     957         144 :                 RelocInfo::CODE_TARGET);
     958             :       }
     959         160 :       __ int3();
     960             :       unwinding_info_writer_.MarkBlockWillExit();
     961             :       break;
     962             :     case kArchDebugBreak:
     963      256023 :       __ int3();
     964      256023 :       break;
     965             :     case kArchThrowTerminator:
     966             :       unwinding_info_writer_.MarkBlockWillExit();
     967             :       break;
     968             :     case kArchNop:
     969             :       // don't emit code for nops.
     970             :       break;
     971             :     case kArchDeoptimize: {
     972             :       int deopt_state_id =
     973       44536 :           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
     974             :       CodeGenResult result =
     975       44536 :           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
     976       44536 :       if (result != kSuccess) return result;
     977             :       unwinding_info_writer_.MarkBlockWillExit();
     978             :       break;
     979             :     }
     980             :     case kArchRet:
     981     2988809 :       AssembleReturn(instr->InputAt(0));
     982     2988464 :       break;
     983             :     case kArchStackPointer:
     984           0 :       __ movq(i.OutputRegister(), rsp);
     985             :       break;
     986             :     case kArchFramePointer:
     987       32822 :       __ movq(i.OutputRegister(), rbp);
     988             :       break;
     989             :     case kArchParentFramePointer:
     990       58132 :       if (frame_access_state()->has_frame()) {
     991       80148 :         __ movq(i.OutputRegister(), Operand(rbp, 0));
     992             :       } else {
     993       31416 :         __ movq(i.OutputRegister(), rbp);
     994             :       }
     995             :       break;
     996             :     case kArchTruncateDoubleToI: {
     997             :       auto result = i.OutputRegister();
     998             :       auto input = i.InputDoubleRegister(0);
     999             :       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
    1000             :           this, result, input, DetermineStubCallMode(),
    1001      106625 :           &unwinding_info_writer_);
    1002             :       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
    1003             :       // use of Cvttsd2siq requires the movl below to avoid sign extension.
    1004       53310 :       __ Cvttsd2siq(result, input);
    1005       53314 :       __ cmpq(result, Immediate(1));
    1006       53313 :       __ j(overflow, ool->entry());
    1007       53314 :       __ bind(ool->exit());
    1008             :       __ movl(result, result);
    1009             :       break;
    1010             :     }
    1011             :     case kArchStoreWithWriteBarrier: {
    1012             :       RecordWriteMode mode =
    1013             :           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
    1014             :       Register object = i.InputRegister(0);
    1015      319798 :       size_t index = 0;
    1016      319798 :       Operand operand = i.MemoryOperand(&index);
    1017      319798 :       Register value = i.InputRegister(index);
    1018             :       Register scratch0 = i.TempRegister(0);
    1019             :       Register scratch1 = i.TempRegister(1);
    1020             :       auto ool = new (zone())
    1021             :           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
    1022      639597 :                                mode, DetermineStubCallMode());
    1023      319799 :       __ StoreTaggedField(operand, value);
    1024             :       if (COMPRESS_POINTERS_BOOL) {
    1025             :         __ DecompressTaggedPointer(object, object);
    1026             :       }
    1027             :       __ CheckPageFlag(object, scratch0,
    1028             :                        MemoryChunk::kPointersFromHereAreInterestingMask,
    1029      319799 :                        not_zero, ool->entry());
    1030      319799 :       __ bind(ool->exit());
    1031             :       break;
    1032             :     }
    1033             :     case kArchWordPoisonOnSpeculation:
    1034             :       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
    1035           0 :       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
    1036             :       break;
    1037             :     case kLFence:
    1038           0 :       __ lfence();
    1039           0 :       break;
    1040             :     case kArchStackSlot: {
    1041             :       FrameOffset offset =
    1042      367610 :           frame_access_state()->GetFrameOffset(i.InputInt32(0));
    1043      367611 :       Register base = offset.from_stack_pointer() ? rsp : rbp;
    1044     1102838 :       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
    1045             :       break;
    1046             :     }
    1047             :     case kIeee754Float64Acos:
    1048         116 :       ASSEMBLE_IEEE754_UNOP(acos);
    1049         116 :       break;
    1050             :     case kIeee754Float64Acosh:
    1051         116 :       ASSEMBLE_IEEE754_UNOP(acosh);
    1052         116 :       break;
    1053             :     case kIeee754Float64Asin:
    1054         116 :       ASSEMBLE_IEEE754_UNOP(asin);
    1055         116 :       break;
    1056             :     case kIeee754Float64Asinh:
    1057         116 :       ASSEMBLE_IEEE754_UNOP(asinh);
    1058         116 :       break;
    1059             :     case kIeee754Float64Atan:
    1060         133 :       ASSEMBLE_IEEE754_UNOP(atan);
    1061         133 :       break;
    1062             :     case kIeee754Float64Atanh:
    1063         116 :       ASSEMBLE_IEEE754_UNOP(atanh);
    1064         116 :       break;
    1065             :     case kIeee754Float64Atan2:
    1066         130 :       ASSEMBLE_IEEE754_BINOP(atan2);
    1067         130 :       break;
    1068             :     case kIeee754Float64Cbrt:
    1069         116 :       ASSEMBLE_IEEE754_UNOP(cbrt);
    1070         116 :       break;
    1071             :     case kIeee754Float64Cos:
    1072         271 :       ASSEMBLE_IEEE754_UNOP(cos);
    1073         271 :       break;
    1074             :     case kIeee754Float64Cosh:
    1075         123 :       ASSEMBLE_IEEE754_UNOP(cosh);
    1076         123 :       break;
    1077             :     case kIeee754Float64Exp:
    1078         148 :       ASSEMBLE_IEEE754_UNOP(exp);
    1079         148 :       break;
    1080             :     case kIeee754Float64Expm1:
    1081         123 :       ASSEMBLE_IEEE754_UNOP(expm1);
    1082         123 :       break;
    1083             :     case kIeee754Float64Log:
    1084         252 :       ASSEMBLE_IEEE754_UNOP(log);
    1085         252 :       break;
    1086             :     case kIeee754Float64Log1p:
    1087         116 :       ASSEMBLE_IEEE754_UNOP(log1p);
    1088         116 :       break;
    1089             :     case kIeee754Float64Log2:
    1090         116 :       ASSEMBLE_IEEE754_UNOP(log2);
    1091         116 :       break;
    1092             :     case kIeee754Float64Log10:
    1093         116 :       ASSEMBLE_IEEE754_UNOP(log10);
    1094         116 :       break;
    1095             :     case kIeee754Float64Pow:
    1096         330 :       ASSEMBLE_IEEE754_BINOP(pow);
    1097         330 :       break;
    1098             :     case kIeee754Float64Sin:
    1099         268 :       ASSEMBLE_IEEE754_UNOP(sin);
    1100         268 :       break;
    1101             :     case kIeee754Float64Sinh:
    1102         123 :       ASSEMBLE_IEEE754_UNOP(sinh);
    1103         123 :       break;
    1104             :     case kIeee754Float64Tan:
    1105         168 :       ASSEMBLE_IEEE754_UNOP(tan);
    1106         168 :       break;
    1107             :     case kIeee754Float64Tanh:
    1108         123 :       ASSEMBLE_IEEE754_UNOP(tanh);
    1109         123 :       break;
    1110             :     case kX64Add32:
    1111      292929 :       ASSEMBLE_BINOP(addl);
    1112             :       break;
    1113             :     case kX64Add:
    1114      306923 :       ASSEMBLE_BINOP(addq);
    1115             :       break;
    1116             :     case kX64Sub32:
    1117      182006 :       ASSEMBLE_BINOP(subl);
    1118             :       break;
    1119             :     case kX64Sub:
    1120      221018 :       ASSEMBLE_BINOP(subq);
    1121             :       break;
    1122             :     case kX64And32:
    1123      719880 :       ASSEMBLE_BINOP(andl);
    1124             :       break;
    1125             :     case kX64And:
    1126      926747 :       ASSEMBLE_BINOP(andq);
    1127             :       break;
    1128             :     case kX64Cmp8:
    1129       36197 :       ASSEMBLE_COMPARE(cmpb);
    1130             :       break;
    1131             :     case kX64Cmp16:
    1132     1300298 :       ASSEMBLE_COMPARE(cmpw);
    1133             :       break;
    1134             :     case kX64Cmp32:
    1135     4697284 :       ASSEMBLE_COMPARE(cmpl);
    1136             :       break;
    1137             :     case kX64Cmp:
    1138     8557002 :       ASSEMBLE_COMPARE(cmpq);
    1139             :       break;
    1140             :     case kX64Test8:
    1141      320413 :       ASSEMBLE_COMPARE(testb);
    1142             :       break;
    1143             :     case kX64Test16:
    1144       91504 :       ASSEMBLE_COMPARE(testw);
    1145             :       break;
    1146             :     case kX64Test32:
    1147      481587 :       ASSEMBLE_COMPARE(testl);
    1148             :       break;
    1149             :     case kX64Test:
    1150     2675786 :       ASSEMBLE_COMPARE(testq);
    1151             :       break;
    1152             :     case kX64Imul32:
    1153      156589 :       ASSEMBLE_MULT(imull);
    1154             :       break;
    1155             :     case kX64Imul:
    1156       61636 :       ASSEMBLE_MULT(imulq);
    1157             :       break;
    1158             :     case kX64ImulHigh32:
    1159        4196 :       if (instr->InputAt(1)->IsRegister()) {
    1160        4196 :         __ imull(i.InputRegister(1));
    1161             :       } else {
    1162           0 :         __ imull(i.InputOperand(1));
    1163             :       }
    1164             :       break;
    1165             :     case kX64UmulHigh32:
    1166        1428 :       if (instr->InputAt(1)->IsRegister()) {
    1167        1428 :         __ mull(i.InputRegister(1));
    1168             :       } else {
    1169           0 :         __ mull(i.InputOperand(1));
    1170             :       }
    1171             :       break;
    1172             :     case kX64Idiv32:
    1173       31993 :       __ cdq();
    1174             :       __ idivl(i.InputRegister(1));
    1175             :       break;
    1176             :     case kX64Idiv:
    1177        2732 :       __ cqo();
    1178             :       __ idivq(i.InputRegister(1));
    1179             :       break;
    1180             :     case kX64Udiv32:
    1181       29121 :       __ xorl(rdx, rdx);
    1182             :       __ divl(i.InputRegister(1));
    1183             :       break;
    1184             :     case kX64Udiv:
    1185        1768 :       __ xorq(rdx, rdx);
    1186             :       __ divq(i.InputRegister(1));
    1187             :       break;
    1188             :     case kX64Not:
    1189          88 :       ASSEMBLE_UNOP(notq);
    1190             :       break;
    1191             :     case kX64Not32:
    1192        5556 :       ASSEMBLE_UNOP(notl);
    1193             :       break;
    1194             :     case kX64Neg:
    1195       17454 :       ASSEMBLE_UNOP(negq);
    1196             :       break;
    1197             :     case kX64Neg32:
    1198       12222 :       ASSEMBLE_UNOP(negl);
    1199             :       break;
    1200             :     case kX64Or32:
    1201      315297 :       ASSEMBLE_BINOP(orl);
    1202             :       break;
    1203             :     case kX64Or:
    1204      343464 :       ASSEMBLE_BINOP(orq);
    1205             :       break;
    1206             :     case kX64Xor32:
    1207       77169 :       ASSEMBLE_BINOP(xorl);
    1208             :       break;
    1209             :     case kX64Xor:
    1210        3272 :       ASSEMBLE_BINOP(xorq);
    1211             :       break;
    1212             :     case kX64Shl32:
    1213      117985 :       ASSEMBLE_SHIFT(shll, 5);
    1214             :       break;
    1215             :     case kX64Shl:
    1216     1393730 :       ASSEMBLE_SHIFT(shlq, 6);
    1217             :       break;
    1218             :     case kX64Shr32:
    1219      392893 :       ASSEMBLE_SHIFT(shrl, 5);
    1220             :       break;
    1221             :     case kX64Shr:
    1222     1435726 :       ASSEMBLE_SHIFT(shrq, 6);
    1223             :       break;
    1224             :     case kX64Sar32:
    1225       98806 :       ASSEMBLE_SHIFT(sarl, 5);
    1226             :       break;
    1227             :     case kX64Sar:
    1228      714480 :       ASSEMBLE_SHIFT(sarq, 6);
    1229             :       break;
    1230             :     case kX64Ror32:
    1231      110780 :       ASSEMBLE_SHIFT(rorl, 5);
    1232             :       break;
    1233             :     case kX64Ror:
    1234         338 :       ASSEMBLE_SHIFT(rorq, 6);
    1235             :       break;
    1236             :     case kX64Lzcnt:
    1237          34 :       if (instr->InputAt(0)->IsRegister()) {
    1238          34 :         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
    1239             :       } else {
    1240           0 :         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
    1241             :       }
    1242             :       break;
    1243             :     case kX64Lzcnt32:
    1244         666 :       if (instr->InputAt(0)->IsRegister()) {
    1245         626 :         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
    1246             :       } else {
    1247          80 :         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
    1248             :       }
    1249             :       break;
    1250             :     case kX64Tzcnt:
    1251          34 :       if (instr->InputAt(0)->IsRegister()) {
    1252          34 :         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
    1253             :       } else {
    1254           0 :         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
    1255             :       }
    1256             :       break;
    1257             :     case kX64Tzcnt32:
    1258         332 :       if (instr->InputAt(0)->IsRegister()) {
    1259         332 :         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
    1260             :       } else {
    1261           0 :         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
    1262             :       }
    1263             :       break;
    1264             :     case kX64Popcnt:
    1265          42 :       if (instr->InputAt(0)->IsRegister()) {
    1266          42 :         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
    1267             :       } else {
    1268           0 :         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
    1269             :       }
    1270             :       break;
    1271             :     case kX64Popcnt32:
    1272          80 :       if (instr->InputAt(0)->IsRegister()) {
    1273          80 :         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
    1274             :       } else {
    1275           0 :         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
    1276             :       }
    1277             :       break;
    1278             :     case kX64Bswap:
    1279          12 :       __ bswapq(i.OutputRegister());
    1280          12 :       break;
    1281             :     case kX64Bswap32:
    1282          44 :       __ bswapl(i.OutputRegister());
    1283          44 :       break;
    1284             :     case kSSEFloat32Cmp:
    1285           0 :       ASSEMBLE_SSE_BINOP(Ucomiss);
    1286             :       break;
    1287             :     case kSSEFloat32Add:
    1288           0 :       ASSEMBLE_SSE_BINOP(addss);
    1289             :       break;
    1290             :     case kSSEFloat32Sub:
    1291           0 :       ASSEMBLE_SSE_BINOP(subss);
    1292             :       break;
    1293             :     case kSSEFloat32Mul:
    1294           0 :       ASSEMBLE_SSE_BINOP(mulss);
    1295             :       break;
    1296             :     case kSSEFloat32Div:
    1297           0 :       ASSEMBLE_SSE_BINOP(divss);
    1298             :       // Don't delete this mov. It may improve performance on some CPUs,
    1299             :       // when there is a (v)mulss depending on the result.
    1300           0 :       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1301           0 :       break;
    1302             :     case kSSEFloat32Abs: {
    1303             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1304           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1305           0 :       __ psrlq(kScratchDoubleReg, 33);
    1306           0 :       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1307           0 :       break;
    1308             :     }
    1309             :     case kSSEFloat32Neg: {
    1310             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1311           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1312           0 :       __ psllq(kScratchDoubleReg, 31);
    1313           0 :       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1314           0 :       break;
    1315             :     }
    1316             :     case kSSEFloat32Sqrt:
    1317         364 :       ASSEMBLE_SSE_UNOP(sqrtss);
    1318             :       break;
    1319             :     case kSSEFloat32ToFloat64:
    1320       40918 :       ASSEMBLE_SSE_UNOP(Cvtss2sd);
    1321             :       break;
    1322             :     case kSSEFloat32Round: {
    1323             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1324             :       RoundingMode const mode =
    1325             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1326             :       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1327             :       break;
    1328             :     }
    1329             :     case kSSEFloat32ToInt32:
    1330         348 :       if (instr->InputAt(0)->IsFPRegister()) {
    1331         348 :         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1332             :       } else {
    1333           0 :         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
    1334             :       }
    1335             :       break;
    1336             :     case kSSEFloat32ToUint32: {
    1337          56 :       if (instr->InputAt(0)->IsFPRegister()) {
    1338          56 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1339             :       } else {
    1340           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1341             :       }
    1342             :       break;
    1343             :     }
    1344             :     case kSSEFloat64Cmp:
    1345        1846 :       ASSEMBLE_SSE_BINOP(Ucomisd);
    1346             :       break;
    1347             :     case kSSEFloat64Add:
    1348         632 :       ASSEMBLE_SSE_BINOP(addsd);
    1349             :       break;
    1350             :     case kSSEFloat64Sub:
    1351         468 :       ASSEMBLE_SSE_BINOP(subsd);
    1352             :       break;
    1353             :     case kSSEFloat64Mul:
    1354          56 :       ASSEMBLE_SSE_BINOP(mulsd);
    1355             :       break;
    1356             :     case kSSEFloat64Div:
    1357          52 :       ASSEMBLE_SSE_BINOP(divsd);
    1358             :       // Don't delete this mov. It may improve performance on some CPUs,
    1359             :       // when there is a (v)mulsd depending on the result.
    1360             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1361             :       break;
    1362             :     case kSSEFloat64Mod: {
    1363        1577 :       __ subq(rsp, Immediate(kDoubleSize));
    1364        1577 :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1365        1577 :                                                        kDoubleSize);
    1366             :       // Move values to st(0) and st(1).
    1367        3154 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
    1368        1577 :       __ fld_d(Operand(rsp, 0));
    1369        3154 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    1370        1577 :       __ fld_d(Operand(rsp, 0));
    1371             :       // Loop while fprem isn't done.
    1372        1577 :       Label mod_loop;
    1373        1577 :       __ bind(&mod_loop);
    1374             :       // This instructions traps on all kinds inputs, but we are assuming the
    1375             :       // floating point control word is set to ignore them all.
    1376        1577 :       __ fprem();
    1377             :       // The following 2 instruction implicitly use rax.
    1378        1577 :       __ fnstsw_ax();
    1379        1577 :       if (CpuFeatures::IsSupported(SAHF)) {
    1380             :         CpuFeatureScope sahf_scope(tasm(), SAHF);
    1381        1545 :         __ sahf();
    1382             :       } else {
    1383             :         __ shrl(rax, Immediate(8));
    1384             :         __ andl(rax, Immediate(0xFF));
    1385          32 :         __ pushq(rax);
    1386             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1387          32 :                                                          kSystemPointerSize);
    1388          32 :         __ popfq();
    1389             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1390          32 :                                                          -kSystemPointerSize);
    1391             :       }
    1392        1577 :       __ j(parity_even, &mod_loop);
    1393             :       // Move output to stack and clean up.
    1394        1577 :       __ fstp(1);
    1395        1577 :       __ fstp_d(Operand(rsp, 0));
    1396        3154 :       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
    1397             :       __ addq(rsp, Immediate(kDoubleSize));
    1398             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1399        1577 :                                                        -kDoubleSize);
    1400             :       break;
    1401             :     }
    1402             :     case kSSEFloat32Max: {
    1403          66 :       Label compare_swap, done_compare;
    1404          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1405             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1406             :       } else {
    1407           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1408             :       }
    1409             :       auto ool =
    1410             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1411          66 :       __ j(parity_even, ool->entry());
    1412          66 :       __ j(above, &done_compare, Label::kNear);
    1413          66 :       __ j(below, &compare_swap, Label::kNear);
    1414             :       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
    1415             :       __ testl(kScratchRegister, Immediate(1));
    1416          66 :       __ j(zero, &done_compare, Label::kNear);
    1417          66 :       __ bind(&compare_swap);
    1418          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1419             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1420             :       } else {
    1421           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1422             :       }
    1423          66 :       __ bind(&done_compare);
    1424          66 :       __ bind(ool->exit());
    1425             :       break;
    1426             :     }
    1427             :     case kSSEFloat32Min: {
    1428          66 :       Label compare_swap, done_compare;
    1429          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1430             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1431             :       } else {
    1432           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1433             :       }
    1434             :       auto ool =
    1435             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1436          66 :       __ j(parity_even, ool->entry());
    1437          66 :       __ j(below, &done_compare, Label::kNear);
    1438          66 :       __ j(above, &compare_swap, Label::kNear);
    1439          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1440             :         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
    1441             :       } else {
    1442           0 :         __ Movss(kScratchDoubleReg, i.InputOperand(1));
    1443             :         __ Movmskps(kScratchRegister, kScratchDoubleReg);
    1444             :       }
    1445             :       __ testl(kScratchRegister, Immediate(1));
    1446          66 :       __ j(zero, &done_compare, Label::kNear);
    1447          66 :       __ bind(&compare_swap);
    1448          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1449             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1450             :       } else {
    1451           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1452             :       }
    1453          66 :       __ bind(&done_compare);
    1454          66 :       __ bind(ool->exit());
    1455             :       break;
    1456             :     }
    1457             :     case kSSEFloat64Max: {
    1458         252 :       Label compare_swap, done_compare;
    1459         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1460             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1461             :       } else {
    1462           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1463             :       }
    1464             :       auto ool =
    1465             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1466         252 :       __ j(parity_even, ool->entry());
    1467         252 :       __ j(above, &done_compare, Label::kNear);
    1468         252 :       __ j(below, &compare_swap, Label::kNear);
    1469             :       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
    1470             :       __ testl(kScratchRegister, Immediate(1));
    1471         252 :       __ j(zero, &done_compare, Label::kNear);
    1472         252 :       __ bind(&compare_swap);
    1473         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1474             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1475             :       } else {
    1476           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1477             :       }
    1478         252 :       __ bind(&done_compare);
    1479         252 :       __ bind(ool->exit());
    1480             :       break;
    1481             :     }
    1482             :     case kSSEFloat64Min: {
    1483         340 :       Label compare_swap, done_compare;
    1484         340 :       if (instr->InputAt(1)->IsFPRegister()) {
    1485             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1486             :       } else {
    1487           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1488             :       }
    1489             :       auto ool =
    1490             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1491         340 :       __ j(parity_even, ool->entry());
    1492         340 :       __ j(below, &done_compare, Label::kNear);
    1493         340 :       __ j(above, &compare_swap, Label::kNear);
    1494         340 :       if (instr->InputAt(1)->IsFPRegister()) {
    1495             :         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
    1496             :       } else {
    1497           0 :         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
    1498             :         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
    1499             :       }
    1500             :       __ testl(kScratchRegister, Immediate(1));
    1501         340 :       __ j(zero, &done_compare, Label::kNear);
    1502         340 :       __ bind(&compare_swap);
    1503         340 :       if (instr->InputAt(1)->IsFPRegister()) {
    1504             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1505             :       } else {
    1506           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1507             :       }
    1508         340 :       __ bind(&done_compare);
    1509         340 :       __ bind(ool->exit());
    1510             :       break;
    1511             :     }
    1512             :     case kSSEFloat64Abs: {
    1513             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1514           6 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1515           6 :       __ psrlq(kScratchDoubleReg, 1);
    1516           6 :       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1517           6 :       break;
    1518             :     }
    1519             :     case kSSEFloat64Neg: {
    1520             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1521          76 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1522          76 :       __ psllq(kScratchDoubleReg, 63);
    1523          76 :       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1524          76 :       break;
    1525             :     }
    1526             :     case kSSEFloat64Sqrt:
    1527         415 :       ASSEMBLE_SSE_UNOP(Sqrtsd);
    1528             :       break;
    1529             :     case kSSEFloat64Round: {
    1530             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1531             :       RoundingMode const mode =
    1532             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1533             :       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1534             :       break;
    1535             :     }
    1536             :     case kSSEFloat64ToFloat32:
    1537       36010 :       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
    1538             :       break;
    1539             :     case kSSEFloat64ToInt32:
    1540      128086 :       if (instr->InputAt(0)->IsFPRegister()) {
    1541      107818 :         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1542             :       } else {
    1543       40536 :         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
    1544             :       }
    1545             :       break;
    1546             :     case kSSEFloat64ToUint32: {
    1547         702 :       if (instr->InputAt(0)->IsFPRegister()) {
    1548         702 :         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1549             :       } else {
    1550           0 :         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
    1551             :       }
    1552        1404 :       if (MiscField::decode(instr->opcode())) {
    1553         642 :         __ AssertZeroExtended(i.OutputRegister());
    1554             :       }
    1555             :       break;
    1556             :     }
    1557             :     case kSSEFloat32ToInt64:
    1558          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1559          52 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1560             :       } else {
    1561           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1562             :       }
    1563          52 :       if (instr->OutputCount() > 1) {
    1564          48 :         __ Set(i.OutputRegister(1), 1);
    1565          48 :         Label done;
    1566          48 :         Label fail;
    1567             :         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
    1568          48 :         if (instr->InputAt(0)->IsFPRegister()) {
    1569             :           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
    1570             :         } else {
    1571           0 :           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
    1572             :         }
    1573             :         // If the input is NaN, then the conversion fails.
    1574          48 :         __ j(parity_even, &fail);
    1575             :         // If the input is INT64_MIN, then the conversion succeeds.
    1576          48 :         __ j(equal, &done);
    1577             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1578             :         // If the conversion results in INT64_MIN, but the input was not
    1579             :         // INT64_MIN, then the conversion fails.
    1580          48 :         __ j(no_overflow, &done);
    1581          48 :         __ bind(&fail);
    1582          48 :         __ Set(i.OutputRegister(1), 0);
    1583          48 :         __ bind(&done);
    1584             :       }
    1585             :       break;
    1586             :     case kSSEFloat64ToInt64:
    1587        1145 :       if (instr->InputAt(0)->IsFPRegister()) {
    1588        1144 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
    1589             :       } else {
    1590           2 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
    1591             :       }
    1592        1147 :       if (instr->OutputCount() > 1) {
    1593        1016 :         __ Set(i.OutputRegister(1), 1);
    1594        1021 :         Label done;
    1595        1021 :         Label fail;
    1596             :         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
    1597        1009 :         if (instr->InputAt(0)->IsFPRegister()) {
    1598             :           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
    1599             :         } else {
    1600           0 :           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
    1601             :         }
    1602             :         // If the input is NaN, then the conversion fails.
    1603        1022 :         __ j(parity_even, &fail);
    1604             :         // If the input is INT64_MIN, then the conversion succeeds.
    1605        1020 :         __ j(equal, &done);
    1606             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1607             :         // If the conversion results in INT64_MIN, but the input was not
    1608             :         // INT64_MIN, then the conversion fails.
    1609        1007 :         __ j(no_overflow, &done);
    1610        1010 :         __ bind(&fail);
    1611        1017 :         __ Set(i.OutputRegister(1), 0);
    1612        1013 :         __ bind(&done);
    1613             :       }
    1614             :       break;
    1615             :     case kSSEFloat32ToUint64: {
    1616          52 :       Label fail;
    1617         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1618          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1619          52 :         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1620             :       } else {
    1621           0 :         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1622             :       }
    1623         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1624          52 :       __ bind(&fail);
    1625             :       break;
    1626             :     }
    1627             :     case kSSEFloat64ToUint64: {
    1628        2972 :       Label fail;
    1629        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1630        2972 :       if (instr->InputAt(0)->IsFPRegister()) {
    1631        2972 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1632             :       } else {
    1633           0 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1634             :       }
    1635        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1636        2972 :       __ bind(&fail);
    1637             :       break;
    1638             :     }
    1639             :     case kSSEInt32ToFloat64:
    1640      378786 :       if (instr->InputAt(0)->IsRegister()) {
    1641      374901 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1642             :       } else {
    1643        7769 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1644             :       }
    1645             :       break;
    1646             :     case kSSEInt32ToFloat32:
    1647         984 :       if (instr->InputAt(0)->IsRegister()) {
    1648         976 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1649             :       } else {
    1650          16 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1651             :       }
    1652             :       break;
    1653             :     case kSSEInt64ToFloat32:
    1654          47 :       if (instr->InputAt(0)->IsRegister()) {
    1655          47 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1656             :       } else {
    1657           0 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1658             :       }
    1659             :       break;
    1660             :     case kSSEInt64ToFloat64:
    1661        3486 :       if (instr->InputAt(0)->IsRegister()) {
    1662        1486 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1663             :       } else {
    1664        4000 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1665             :       }
    1666             :       break;
    1667             :     case kSSEUint64ToFloat32:
    1668          32 :       if (instr->InputAt(0)->IsRegister()) {
    1669          32 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1670             :       } else {
    1671           0 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1672             :       }
    1673             :       break;
    1674             :     case kSSEUint64ToFloat64:
    1675        3608 :       if (instr->InputAt(0)->IsRegister()) {
    1676        2376 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1677             :       } else {
    1678        2464 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1679             :       }
    1680             :       break;
    1681             :     case kSSEUint32ToFloat64:
    1682        9970 :       if (instr->InputAt(0)->IsRegister()) {
    1683         392 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1684             :       } else {
    1685       19156 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1686             :       }
    1687             :       break;
    1688             :     case kSSEUint32ToFloat32:
    1689          88 :       if (instr->InputAt(0)->IsRegister()) {
    1690          88 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1691             :       } else {
    1692           0 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1693             :       }
    1694             :       break;
    1695             :     case kSSEFloat64ExtractLowWord32:
    1696         116 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1697           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    1698             :       } else {
    1699             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    1700             :       }
    1701             :       break;
    1702             :     case kSSEFloat64ExtractHighWord32:
    1703      101570 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1704      120164 :         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
    1705             :       } else {
    1706       41488 :         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
    1707             :       }
    1708             :       break;
    1709             :     case kSSEFloat64InsertLowWord32:
    1710           4 :       if (instr->InputAt(1)->IsRegister()) {
    1711           4 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
    1712             :       } else {
    1713           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
    1714             :       }
    1715             :       break;
    1716             :     case kSSEFloat64InsertHighWord32:
    1717         116 :       if (instr->InputAt(1)->IsRegister()) {
    1718         116 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
    1719             :       } else {
    1720           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
    1721             :       }
    1722             :       break;
    1723             :     case kSSEFloat64LoadLowWord32:
    1724         112 :       if (instr->InputAt(0)->IsRegister()) {
    1725             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    1726             :       } else {
    1727           0 :         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
    1728             :       }
    1729             :       break;
    1730             :     case kAVXFloat32Cmp: {
    1731             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1732        1539 :       if (instr->InputAt(1)->IsFPRegister()) {
    1733        1516 :         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1734             :       } else {
    1735          46 :         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1736             :       }
    1737             :       break;
    1738             :     }
    1739             :     case kAVXFloat32Add:
    1740        3618 :       ASSEMBLE_AVX_BINOP(vaddss);
    1741             :       break;
    1742             :     case kAVXFloat32Sub:
    1743        5242 :       ASSEMBLE_AVX_BINOP(vsubss);
    1744             :       break;
    1745             :     case kAVXFloat32Mul:
    1746        1778 :       ASSEMBLE_AVX_BINOP(vmulss);
    1747             :       break;
    1748             :     case kAVXFloat32Div:
    1749         770 :       ASSEMBLE_AVX_BINOP(vdivss);
    1750             :       // Don't delete this mov. It may improve performance on some CPUs,
    1751             :       // when there is a (v)mulss depending on the result.
    1752             :       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1753             :       break;
    1754             :     case kAVXFloat64Cmp: {
    1755             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1756      261924 :       if (instr->InputAt(1)->IsFPRegister()) {
    1757      241295 :         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1758             :       } else {
    1759       41258 :         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1760             :       }
    1761             :       break;
    1762             :     }
    1763             :     case kAVXFloat64Add:
    1764      158970 :       ASSEMBLE_AVX_BINOP(vaddsd);
    1765             :       break;
    1766             :     case kAVXFloat64Sub:
    1767       31170 :       ASSEMBLE_AVX_BINOP(vsubsd);
    1768             :       break;
    1769             :     case kAVXFloat64Mul:
    1770       23718 :       ASSEMBLE_AVX_BINOP(vmulsd);
    1771             :       break;
    1772             :     case kAVXFloat64Div:
    1773       24074 :       ASSEMBLE_AVX_BINOP(vdivsd);
    1774             :       // Don't delete this mov. It may improve performance on some CPUs,
    1775             :       // when there is a (v)mulsd depending on the result.
    1776             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1777             :       break;
    1778             :     case kAVXFloat32Abs: {
    1779             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1780             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1781          66 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1782             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
    1783          66 :       if (instr->InputAt(0)->IsFPRegister()) {
    1784             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1785             :                   i.InputDoubleRegister(0));
    1786             :       } else {
    1787           0 :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1788             :                   i.InputOperand(0));
    1789             :       }
    1790             :       break;
    1791             :     }
    1792             :     case kAVXFloat32Neg: {
    1793             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1794             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1795         175 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1796             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
    1797         175 :       if (instr->InputAt(0)->IsFPRegister()) {
    1798             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1799             :                   i.InputDoubleRegister(0));
    1800             :       } else {
    1801           0 :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1802             :                   i.InputOperand(0));
    1803             :       }
    1804             :       break;
    1805             :     }
    1806             :     case kAVXFloat64Abs: {
    1807             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1808             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1809         623 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1810             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
    1811         623 :       if (instr->InputAt(0)->IsFPRegister()) {
    1812             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1813             :                   i.InputDoubleRegister(0));
    1814             :       } else {
    1815           0 :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1816             :                   i.InputOperand(0));
    1817             :       }
    1818             :       break;
    1819             :     }
    1820             :     case kAVXFloat64Neg: {
    1821             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1822             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1823        9651 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1824             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
    1825        9651 :       if (instr->InputAt(0)->IsFPRegister()) {
    1826             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1827             :                   i.InputDoubleRegister(0));
    1828             :       } else {
    1829          82 :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1830             :                   i.InputOperand(0));
    1831             :       }
    1832             :       break;
    1833             :     }
    1834             :     case kSSEFloat64SilenceNaN:
    1835             :       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
    1836             :       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
    1837             :       break;
    1838             :     case kX64Movsxbl:
    1839       45811 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1840       92241 :       ASSEMBLE_MOVX(movsxbl);
    1841       45811 :       __ AssertZeroExtended(i.OutputRegister());
    1842       45811 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1843       45811 :       break;
    1844             :     case kX64Movzxbl:
    1845      175409 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1846      351958 :       ASSEMBLE_MOVX(movzxbl);
    1847      175411 :       __ AssertZeroExtended(i.OutputRegister());
    1848      175411 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1849      175411 :       break;
    1850             :     case kX64Movsxbq:
    1851       13514 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1852       27039 :       ASSEMBLE_MOVX(movsxbq);
    1853       13514 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1854       13514 :       break;
    1855             :     case kX64Movzxbq:
    1856       14020 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1857       28040 :       ASSEMBLE_MOVX(movzxbq);
    1858       14020 :       __ AssertZeroExtended(i.OutputRegister());
    1859       14020 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1860       14020 :       break;
    1861             :     case kX64Movb: {
    1862       78645 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1863       78644 :       size_t index = 0;
    1864       78644 :       Operand operand = i.MemoryOperand(&index);
    1865      157292 :       if (HasImmediateInput(instr, index)) {
    1866       13482 :         __ movb(operand, Immediate(i.InputInt8(index)));
    1867             :       } else {
    1868       71905 :         __ movb(operand, i.InputRegister(index));
    1869             :       }
    1870       78645 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1871             :       break;
    1872             :     }
    1873             :     case kX64Movsxwl:
    1874       10506 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1875       21494 :       ASSEMBLE_MOVX(movsxwl);
    1876       10507 :       __ AssertZeroExtended(i.OutputRegister());
    1877       10506 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1878       10506 :       break;
    1879             :     case kX64Movzxwl:
    1880      160717 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1881      327897 :       ASSEMBLE_MOVX(movzxwl);
    1882      160719 :       __ AssertZeroExtended(i.OutputRegister());
    1883      160719 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1884      160719 :       break;
    1885             :     case kX64Movsxwq:
    1886        9027 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1887       18058 :       ASSEMBLE_MOVX(movsxwq);
    1888             :       break;
    1889             :     case kX64Movzxwq:
    1890         672 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1891        1344 :       ASSEMBLE_MOVX(movzxwq);
    1892         672 :       __ AssertZeroExtended(i.OutputRegister());
    1893         672 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1894         672 :       break;
    1895             :     case kX64Movw: {
    1896       14014 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1897       14014 :       size_t index = 0;
    1898       14014 :       Operand operand = i.MemoryOperand(&index);
    1899       28030 :       if (HasImmediateInput(instr, index)) {
    1900        1830 :         __ movw(operand, Immediate(i.InputInt16(index)));
    1901             :       } else {
    1902       13100 :         __ movw(operand, i.InputRegister(index));
    1903             :       }
    1904       14015 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1905             :       break;
    1906             :     }
    1907             :     case kX64Movl:
    1908     2485652 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1909     2485360 :       if (instr->HasOutput()) {
    1910      961008 :         if (instr->addressing_mode() == kMode_None) {
    1911       52799 :           if (instr->InputAt(0)->IsRegister()) {
    1912       49218 :             __ movl(i.OutputRegister(), i.InputRegister(0));
    1913             :           } else {
    1914        7163 :             __ movl(i.OutputRegister(), i.InputOperand(0));
    1915             :           }
    1916             :         } else {
    1917      908228 :           __ movl(i.OutputRegister(), i.MemoryOperand());
    1918             :         }
    1919      961062 :         __ AssertZeroExtended(i.OutputRegister());
    1920             :       } else {
    1921     1524352 :         size_t index = 0;
    1922     1524352 :         Operand operand = i.MemoryOperand(&index);
    1923     3048898 :         if (HasImmediateInput(instr, index)) {
    1924      479445 :           __ movl(operand, i.InputImmediate(index));
    1925             :         } else {
    1926     1045007 :           __ movl(operand, i.InputRegister(index));
    1927             :         }
    1928             :       }
    1929     2485490 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1930     2485529 :       break;
    1931             :     case kX64Movsxlq:
    1932      231102 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1933      547416 :       ASSEMBLE_MOVX(movsxlq);
    1934      231102 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1935      231102 :       break;
    1936             :     case kX64MovqDecompressTaggedSigned: {
    1937           0 :       CHECK(instr->HasOutput());
    1938           0 :       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
    1939           0 :       break;
    1940             :     }
    1941             :     case kX64MovqDecompressTaggedPointer: {
    1942           0 :       CHECK(instr->HasOutput());
    1943           0 :       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
    1944           0 :       break;
    1945             :     }
    1946             :     case kX64MovqDecompressAnyTagged: {
    1947           0 :       CHECK(instr->HasOutput());
    1948           0 :       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
    1949           0 :       break;
    1950             :     }
    1951             :     case kX64MovqCompressTagged: {
    1952           0 :       CHECK(!instr->HasOutput());
    1953           0 :       size_t index = 0;
    1954           0 :       Operand operand = i.MemoryOperand(&index);
    1955           0 :       if (HasImmediateInput(instr, index)) {
    1956           0 :         __ StoreTaggedField(operand, i.InputImmediate(index));
    1957             :       } else {
    1958           0 :         __ StoreTaggedField(operand, i.InputRegister(index));
    1959             :       }
    1960             :       break;
    1961             :     }
    1962             :     case kX64DecompressSigned: {
    1963           0 :       CHECK(instr->HasOutput());
    1964           0 :       ASSEMBLE_MOVX(movsxlq);
    1965             :       break;
    1966             :     }
    1967             :     case kX64DecompressPointer: {
    1968           0 :       CHECK(instr->HasOutput());
    1969           0 :       ASSEMBLE_MOVX(movsxlq);
    1970           0 :       __ addq(i.OutputRegister(), kRootRegister);
    1971             :       break;
    1972             :     }
    1973             :     case kX64DecompressAny: {
    1974           0 :       CHECK(instr->HasOutput());
    1975           0 :       ASSEMBLE_MOVX(movsxlq);
    1976             :       // TODO(solanes): Do branchful compute?
    1977             :       // Branchlessly compute |masked_root|:
    1978             :       STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
    1979             :       Register masked_root = kScratchRegister;
    1980           0 :       __ movl(masked_root, i.OutputRegister());
    1981             :       __ andl(masked_root, Immediate(kSmiTagMask));
    1982             :       __ negq(masked_root);
    1983             :       __ andq(masked_root, kRootRegister);
    1984             :       // Now this add operation will either leave the value unchanged if it is a
    1985             :       // smi or add the isolate root if it is a heap object.
    1986             :       __ addq(i.OutputRegister(), masked_root);
    1987             :       break;
    1988             :     }
    1989             :     // TODO(solanes): Combine into one Compress? They seem to be identical.
    1990             :     // TODO(solanes): We might get away with doing a no-op in these three cases.
    1991             :     // The movl instruction is the conservative way for the moment.
    1992             :     case kX64CompressSigned: {
    1993           0 :       ASSEMBLE_MOVX(movl);
    1994             :       break;
    1995             :     }
    1996             :     case kX64CompressPointer: {
    1997           0 :       ASSEMBLE_MOVX(movl);
    1998             :       break;
    1999             :     }
    2000             :     case kX64CompressAny: {
    2001           0 :       ASSEMBLE_MOVX(movl);
    2002             :       break;
    2003             :     }
    2004             :     case kX64Movq:
    2005     8624346 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2006     8624053 :       if (instr->HasOutput()) {
    2007     5785970 :         __ movq(i.OutputRegister(), i.MemoryOperand());
    2008             :       } else {
    2009     2839578 :         size_t index = 0;
    2010     2839578 :         Operand operand = i.MemoryOperand(&index);
    2011     5679166 :         if (HasImmediateInput(instr, index)) {
    2012       21854 :           __ movq(operand, i.InputImmediate(index));
    2013             :         } else {
    2014     2817729 :           __ movq(operand, i.InputRegister(index));
    2015             :         }
    2016             :       }
    2017     8626541 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    2018     8626053 :       break;
    2019             :     case kX64Movss:
    2020       28865 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2021       28868 :       if (instr->HasOutput()) {
    2022       15711 :         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
    2023             :       } else {
    2024       13158 :         size_t index = 0;
    2025       13158 :         Operand operand = i.MemoryOperand(&index);
    2026       26316 :         __ movss(operand, i.InputDoubleRegister(index));
    2027             :       }
    2028             :       break;
    2029             :     case kX64Movsd: {
    2030      619130 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2031      619151 :       if (instr->HasOutput()) {
    2032             :         const MemoryAccessMode access_mode =
    2033             :             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
    2034      426427 :         if (access_mode == kMemoryAccessPoisoned) {
    2035             :           // If we have to poison the loaded value, we load into a general
    2036             :           // purpose register first, mask it with the poison, and move the
    2037             :           // value from the general purpose register into the double register.
    2038           0 :           __ movq(kScratchRegister, i.MemoryOperand());
    2039             :           __ andq(kScratchRegister, kSpeculationPoisonRegister);
    2040             :           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
    2041             :         } else {
    2042             :           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
    2043             :         }
    2044             :       } else {
    2045      192724 :         size_t index = 0;
    2046      192724 :         Operand operand = i.MemoryOperand(&index);
    2047      192726 :         __ Movsd(operand, i.InputDoubleRegister(index));
    2048             :       }
    2049             :       break;
    2050             :     }
    2051             :     case kX64Movdqu: {
    2052             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2053        9500 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2054        9500 :       if (instr->HasOutput()) {
    2055        5420 :         __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
    2056             :       } else {
    2057        4080 :         size_t index = 0;
    2058        4080 :         Operand operand = i.MemoryOperand(&index);
    2059        8160 :         __ movdqu(operand, i.InputSimd128Register(index));
    2060             :       }
    2061             :       break;
    2062             :     }
    2063             :     case kX64BitcastFI:
    2064       51108 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2065           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    2066             :       } else {
    2067             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    2068             :       }
    2069             :       break;
    2070             :     case kX64BitcastDL:
    2071       50784 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2072           0 :         __ movq(i.OutputRegister(), i.InputOperand(0));
    2073             :       } else {
    2074             :         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
    2075             :       }
    2076             :       break;
    2077             :     case kX64BitcastIF:
    2078         326 :       if (instr->InputAt(0)->IsRegister()) {
    2079             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    2080             :       } else {
    2081           0 :         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
    2082             :       }
    2083             :       break;
    2084             :     case kX64BitcastLD:
    2085         214 :       if (instr->InputAt(0)->IsRegister()) {
    2086             :         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
    2087             :       } else {
    2088           0 :         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
    2089             :       }
    2090             :       break;
    2091             :     case kX64Lea32: {
    2092             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2093             :       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
    2094             :       // and addressing mode just happens to work out. The "addl"/"subl" forms
    2095             :       // in these cases are faster based on measurements.
    2096      328545 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2097      153013 :         if (mode == kMode_MRI) {
    2098             :           int32_t constant_summand = i.InputInt32(1);
    2099             :           DCHECK_NE(0, constant_summand);
    2100       76389 :           if (constant_summand > 0) {
    2101       55208 :             __ addl(i.OutputRegister(), Immediate(constant_summand));
    2102             :           } else {
    2103       21181 :             __ subl(i.OutputRegister(),
    2104             :                     Immediate(base::NegateWithWraparound(constant_summand)));
    2105             :           }
    2106       76616 :         } else if (mode == kMode_MR1) {
    2107       16605 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2108        4287 :             __ shll(i.OutputRegister(), Immediate(1));
    2109             :           } else {
    2110       12318 :             __ addl(i.OutputRegister(), i.InputRegister(1));
    2111             :           }
    2112       60011 :         } else if (mode == kMode_M2) {
    2113           0 :           __ shll(i.OutputRegister(), Immediate(1));
    2114       60011 :         } else if (mode == kMode_M4) {
    2115        2932 :           __ shll(i.OutputRegister(), Immediate(2));
    2116       57079 :         } else if (mode == kMode_M8) {
    2117         122 :           __ shll(i.OutputRegister(), Immediate(3));
    2118             :         } else {
    2119       56958 :           __ leal(i.OutputRegister(), i.MemoryOperand());
    2120             :         }
    2121      201042 :       } else if (mode == kMode_MR1 &&
    2122             :                  i.InputRegister(1) == i.OutputRegister()) {
    2123       16925 :         __ addl(i.OutputRegister(), i.InputRegister(0));
    2124             :       } else {
    2125      158603 :         __ leal(i.OutputRegister(), i.MemoryOperand());
    2126             :       }
    2127      328574 :       __ AssertZeroExtended(i.OutputRegister());
    2128      328597 :       break;
    2129             :     }
    2130             :     case kX64Lea: {
    2131             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2132             :       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
    2133             :       // and addressing mode just happens to work out. The "addq"/"subq" forms
    2134             :       // in these cases are faster based on measurements.
    2135     1936110 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2136      536152 :         if (mode == kMode_MRI) {
    2137             :           int32_t constant_summand = i.InputInt32(1);
    2138      434887 :           if (constant_summand > 0) {
    2139      352433 :             __ addq(i.OutputRegister(), Immediate(constant_summand));
    2140       82454 :           } else if (constant_summand < 0) {
    2141      164850 :             __ subq(i.OutputRegister(), Immediate(-constant_summand));
    2142             :           }
    2143      101269 :         } else if (mode == kMode_MR1) {
    2144       50474 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2145        2714 :             __ shlq(i.OutputRegister(), Immediate(1));
    2146             :           } else {
    2147       47760 :             __ addq(i.OutputRegister(), i.InputRegister(1));
    2148             :           }
    2149       50795 :         } else if (mode == kMode_M2) {
    2150           0 :           __ shlq(i.OutputRegister(), Immediate(1));
    2151       50795 :         } else if (mode == kMode_M4) {
    2152         336 :           __ shlq(i.OutputRegister(), Immediate(2));
    2153       50459 :         } else if (mode == kMode_M8) {
    2154       12792 :           __ shlq(i.OutputRegister(), Immediate(3));
    2155             :         } else {
    2156       37667 :           __ leaq(i.OutputRegister(), i.MemoryOperand());
    2157             :         }
    2158     1640110 :       } else if (mode == kMode_MR1 &&
    2159             :                  i.InputRegister(1) == i.OutputRegister()) {
    2160      168912 :         __ addq(i.OutputRegister(), i.InputRegister(0));
    2161             :       } else {
    2162     1231065 :         __ leaq(i.OutputRegister(), i.MemoryOperand());
    2163             :       }
    2164             :       break;
    2165             :     }
    2166             :     case kX64Dec32:
    2167           0 :       __ decl(i.OutputRegister());
    2168             :       break;
    2169             :     case kX64Inc32:
    2170           0 :       __ incl(i.OutputRegister());
    2171             :       break;
    2172             :     case kX64Push:
    2173     3543038 :       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
    2174       18687 :         size_t index = 0;
    2175       18687 :         Operand operand = i.MemoryOperand(&index);
    2176       18690 :         __ pushq(operand);
    2177             :         frame_access_state()->IncreaseSPDelta(1);
    2178       18689 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2179       18689 :                                                          kSystemPointerSize);
    2180     3524351 :       } else if (HasImmediateInput(instr, 0)) {
    2181      524111 :         __ pushq(i.InputImmediate(0));
    2182             :         frame_access_state()->IncreaseSPDelta(1);
    2183      524111 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2184      524111 :                                                          kSystemPointerSize);
    2185     3000240 :       } else if (instr->InputAt(0)->IsRegister()) {
    2186     2249153 :         __ pushq(i.InputRegister(0));
    2187             :         frame_access_state()->IncreaseSPDelta(1);
    2188     2249160 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2189     2249160 :                                                          kSystemPointerSize);
    2190     1490244 :       } else if (instr->InputAt(0)->IsFloatRegister() ||
    2191             :                  instr->InputAt(0)->IsDoubleRegister()) {
    2192             :         // TODO(titzer): use another machine instruction?
    2193       22822 :         __ subq(rsp, Immediate(kDoubleSize));
    2194             :         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
    2195       22822 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2196       22822 :                                                          kDoubleSize);
    2197       45644 :         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    2198      728265 :       } else if (instr->InputAt(0)->IsSimd128Register()) {
    2199             :         // TODO(titzer): use another machine instruction?
    2200          96 :         __ subq(rsp, Immediate(kSimd128Size));
    2201             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2202             :                                               kSystemPointerSize);
    2203          96 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2204          96 :                                                          kSimd128Size);
    2205         192 :         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
    2206      748026 :       } else if (instr->InputAt(0)->IsStackSlot() ||
    2207      741594 :                  instr->InputAt(0)->IsFloatStackSlot() ||
    2208             :                  instr->InputAt(0)->IsDoubleStackSlot()) {
    2209      727881 :         __ pushq(i.InputOperand(0));
    2210             :         frame_access_state()->IncreaseSPDelta(1);
    2211      727885 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2212      727885 :                                                          kSystemPointerSize);
    2213             :       } else {
    2214             :         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
    2215         288 :         __ Movups(kScratchDoubleReg, i.InputOperand(0));
    2216             :         // TODO(titzer): use another machine instruction?
    2217             :         __ subq(rsp, Immediate(kSimd128Size));
    2218             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2219             :                                               kSystemPointerSize);
    2220         288 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2221         288 :                                                          kSimd128Size);
    2222         576 :         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
    2223             :       }
    2224             :       break;
    2225             :     case kX64Poke: {
    2226             :       int slot = MiscField::decode(instr->opcode());
    2227        3392 :       if (HasImmediateInput(instr, 0)) {
    2228        2264 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
    2229             :       } else {
    2230        4520 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
    2231             :       }
    2232             :       break;
    2233             :     }
    2234             :     case kX64Peek: {
    2235             :       int reverse_slot = i.InputInt32(0);
    2236             :       int offset =
    2237        5067 :           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
    2238        5067 :       if (instr->OutputAt(0)->IsFPRegister()) {
    2239             :         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
    2240        2528 :         if (op->representation() == MachineRepresentation::kFloat64) {
    2241        2528 :           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
    2242             :         } else {
    2243             :           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
    2244        2528 :           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
    2245             :         }
    2246             :       } else {
    2247        7617 :         __ movq(i.OutputRegister(), Operand(rbp, offset));
    2248             :       }
    2249             :       break;
    2250             :     }
    2251             :     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
    2252             :     case kX64F32x4Splat: {
    2253         140 :       XMMRegister dst = i.OutputSimd128Register();
    2254         140 :       if (instr->InputAt(0)->IsFPRegister()) {
    2255         140 :         __ movss(dst, i.InputDoubleRegister(0));
    2256             :       } else {
    2257           0 :         __ movss(dst, i.InputOperand(0));
    2258             :       }
    2259         140 :       __ shufps(dst, dst, 0x0);
    2260             :       break;
    2261             :     }
    2262             :     case kX64F32x4ExtractLane: {
    2263             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2264          64 :       __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
    2265          32 :       __ movd(i.OutputDoubleRegister(), kScratchRegister);
    2266             :       break;
    2267             :     }
    2268             :     case kX64F32x4ReplaceLane: {
    2269             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2270             :       // The insertps instruction uses imm8[5:4] to indicate the lane
    2271             :       // that needs to be replaced.
    2272          32 :       byte select = i.InputInt8(1) << 4 & 0x30;
    2273          32 :       if (instr->InputAt(2)->IsFPRegister()) {
    2274          64 :         __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
    2275          32 :                     select);
    2276             :       } else {
    2277           0 :         __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
    2278             :       }
    2279             :       break;
    2280             :     }
    2281             :     case kX64F32x4SConvertI32x4: {
    2282           4 :       __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2283           4 :       break;
    2284             :     }
    2285             :     case kX64F32x4UConvertI32x4: {
    2286             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2287             :       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
    2288             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2289             :       XMMRegister dst = i.OutputSimd128Register();
    2290           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
    2291           4 :       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
    2292             :       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
    2293           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
    2294           4 :       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
    2295           4 :       __ cvtdq2ps(dst, dst);             // convert hi exactly
    2296           4 :       __ addps(dst, dst);                // double hi, exactly
    2297           4 :       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
    2298             :       break;
    2299             :     }
    2300             :     case kX64F32x4Abs: {
    2301             :       XMMRegister dst = i.OutputSimd128Register();
    2302             :       XMMRegister src = i.InputSimd128Register(0);
    2303           4 :       if (dst == src) {
    2304           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2305           4 :         __ psrld(kScratchDoubleReg, 1);
    2306           4 :         __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
    2307             :       } else {
    2308           0 :         __ pcmpeqd(dst, dst);
    2309           0 :         __ psrld(dst, 1);
    2310           0 :         __ andps(dst, i.InputSimd128Register(0));
    2311             :       }
    2312             :       break;
    2313             :     }
    2314             :     case kX64F32x4Neg: {
    2315             :       XMMRegister dst = i.OutputSimd128Register();
    2316             :       XMMRegister src = i.InputSimd128Register(0);
    2317           4 :       if (dst == src) {
    2318           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2319           4 :         __ pslld(kScratchDoubleReg, 31);
    2320           4 :         __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
    2321             :       } else {
    2322           0 :         __ pcmpeqd(dst, dst);
    2323           0 :         __ pslld(dst, 31);
    2324           0 :         __ xorps(dst, i.InputSimd128Register(0));
    2325             :       }
    2326             :       break;
    2327             :     }
    2328             :     case kX64F32x4RecipApprox: {
    2329           4 :       __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2330           4 :       break;
    2331             :     }
    2332             :     case kX64F32x4RecipSqrtApprox: {
    2333           4 :       __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2334           4 :       break;
    2335             :     }
    2336             :     case kX64F32x4Add: {
    2337             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2338          12 :       __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2339          12 :       break;
    2340             :     }
    2341             :     case kX64F32x4AddHoriz: {
    2342             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2343             :       CpuFeatureScope sse_scope(tasm(), SSE3);
    2344           4 :       __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2345             :       break;
    2346             :     }
    2347             :     case kX64F32x4Sub: {
    2348             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2349           4 :       __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2350           4 :       break;
    2351             :     }
    2352             :     case kX64F32x4Mul: {
    2353             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2354           4 :       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2355           4 :       break;
    2356             :     }
    2357             :     case kX64F32x4Min: {
    2358           4 :       XMMRegister src1 = i.InputSimd128Register(1),
    2359           4 :                   dst = i.OutputSimd128Register();
    2360             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2361             :       // The minps instruction doesn't propagate NaNs and +0's in its first
    2362             :       // operand. Perform minps in both orders, merge the resuls, and adjust.
    2363           4 :       __ movaps(kScratchDoubleReg, src1);
    2364           4 :       __ minps(kScratchDoubleReg, dst);
    2365           4 :       __ minps(dst, src1);
    2366             :       // propagate -0's and NaNs, which may be non-canonical.
    2367           4 :       __ orps(kScratchDoubleReg, dst);
    2368             :       // Canonicalize NaNs by quieting and clearing the payload.
    2369           4 :       __ cmpps(dst, kScratchDoubleReg, 3);
    2370           4 :       __ orps(kScratchDoubleReg, dst);
    2371           4 :       __ psrld(dst, 10);
    2372           4 :       __ andnps(dst, kScratchDoubleReg);
    2373             :       break;
    2374             :     }
    2375             :     case kX64F32x4Max: {
    2376           4 :       XMMRegister src1 = i.InputSimd128Register(1),
    2377           4 :                   dst = i.OutputSimd128Register();
    2378             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2379             :       // The maxps instruction doesn't propagate NaNs and +0's in its first
    2380             :       // operand. Perform maxps in both orders, merge the resuls, and adjust.
    2381           4 :       __ movaps(kScratchDoubleReg, src1);
    2382           4 :       __ maxps(kScratchDoubleReg, dst);
    2383           4 :       __ maxps(dst, src1);
    2384             :       // Find discrepancies.
    2385           4 :       __ xorps(dst, kScratchDoubleReg);
    2386             :       // Propagate NaNs, which may be non-canonical.
    2387           4 :       __ orps(kScratchDoubleReg, dst);
    2388             :       // Propagate sign discrepancy and (subtle) quiet NaNs.
    2389           4 :       __ subps(kScratchDoubleReg, dst);
    2390             :       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
    2391           4 :       __ cmpps(dst, kScratchDoubleReg, 3);
    2392           4 :       __ psrld(dst, 10);
    2393           4 :       __ andnps(dst, kScratchDoubleReg);
    2394             :       break;
    2395             :     }
    2396             :     case kX64F32x4Eq: {
    2397             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2398           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
    2399           4 :       break;
    2400             :     }
    2401             :     case kX64F32x4Ne: {
    2402             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2403           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
    2404           4 :       break;
    2405             :     }
    2406             :     case kX64F32x4Lt: {
    2407             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2408           8 :       __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2409             :       break;
    2410             :     }
    2411             :     case kX64F32x4Le: {
    2412             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2413           8 :       __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2414             :       break;
    2415             :     }
    2416             :     case kX64I32x4Splat: {
    2417        1088 :       XMMRegister dst = i.OutputSimd128Register();
    2418        1088 :       if (instr->InputAt(0)->IsRegister()) {
    2419        1088 :         __ movd(dst, i.InputRegister(0));
    2420             :       } else {
    2421           0 :         __ movd(dst, i.InputOperand(0));
    2422             :       }
    2423        1088 :       __ pshufd(dst, dst, 0x0);
    2424             :       break;
    2425             :     }
    2426             :     case kX64I32x4ExtractLane: {
    2427             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2428        3814 :       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
    2429             :       break;
    2430             :     }
    2431             :     case kX64I32x4ReplaceLane: {
    2432             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2433        1784 :       if (instr->InputAt(2)->IsRegister()) {
    2434         296 :         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
    2435         296 :                   i.InputInt8(1));
    2436             :       } else {
    2437        2976 :         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2438             :       }
    2439             :       break;
    2440             :     }
    2441             :     case kX64I32x4SConvertF32x4: {
    2442             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2443             :       XMMRegister dst = i.OutputSimd128Register();
    2444             :       // NAN->0
    2445           4 :       __ movaps(kScratchDoubleReg, dst);
    2446             :       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
    2447             :       __ pand(dst, kScratchDoubleReg);
    2448             :       // Set top bit if >= 0 (but not -0.0!)
    2449             :       __ pxor(kScratchDoubleReg, dst);
    2450             :       // Convert
    2451           4 :       __ cvttps2dq(dst, dst);
    2452             :       // Set top bit if >=0 is now < 0
    2453             :       __ pand(kScratchDoubleReg, dst);
    2454           4 :       __ psrad(kScratchDoubleReg, 31);
    2455             :       // Set positive overflow lanes to 0x7FFFFFFF
    2456             :       __ pxor(dst, kScratchDoubleReg);
    2457             :       break;
    2458             :     }
    2459             :     case kX64I32x4SConvertI16x8Low: {
    2460             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2461           4 :       __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2462             :       break;
    2463             :     }
    2464             :     case kX64I32x4SConvertI16x8High: {
    2465             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2466             :       XMMRegister dst = i.OutputSimd128Register();
    2467           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2468             :       __ pmovsxwd(dst, dst);
    2469             :       break;
    2470             :     }
    2471             :     case kX64I32x4Neg: {
    2472             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2473             :       XMMRegister dst = i.OutputSimd128Register();
    2474             :       XMMRegister src = i.InputSimd128Register(0);
    2475           4 :       if (dst == src) {
    2476           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2477             :         __ psignd(dst, kScratchDoubleReg);
    2478             :       } else {
    2479           0 :         __ pxor(dst, dst);
    2480             :         __ psubd(dst, src);
    2481             :       }
    2482             :       break;
    2483             :     }
    2484             :     case kX64I32x4Shl: {
    2485         248 :       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
    2486         124 :       break;
    2487             :     }
    2488             :     case kX64I32x4ShrS: {
    2489         248 :       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
    2490         124 :       break;
    2491             :     }
    2492             :     case kX64I32x4Add: {
    2493          12 :       __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2494             :       break;
    2495             :     }
    2496             :     case kX64I32x4AddHoriz: {
    2497             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2498           4 :       __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2499             :       break;
    2500             :     }
    2501             :     case kX64I32x4Sub: {
    2502           4 :       __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2503             :       break;
    2504             :     }
    2505             :     case kX64I32x4Mul: {
    2506             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2507           4 :       __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2508             :       break;
    2509             :     }
    2510             :     case kX64I32x4MinS: {
    2511             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2512           4 :       __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2513             :       break;
    2514             :     }
    2515             :     case kX64I32x4MaxS: {
    2516             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2517           4 :       __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2518             :       break;
    2519             :     }
    2520             :     case kX64I32x4Eq: {
    2521          12 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2522             :       break;
    2523             :     }
    2524             :     case kX64I32x4Ne: {
    2525          16 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2526             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2527             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2528             :       break;
    2529             :     }
    2530             :     case kX64I32x4GtS: {
    2531           8 :       __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2532             :       break;
    2533             :     }
    2534             :     case kX64I32x4GeS: {
    2535             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2536             :       XMMRegister dst = i.OutputSimd128Register();
    2537             :       XMMRegister src = i.InputSimd128Register(1);
    2538           8 :       __ pminsd(dst, src);
    2539             :       __ pcmpeqd(dst, src);
    2540             :       break;
    2541             :     }
    2542             :     case kX64I32x4UConvertF32x4: {
    2543             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2544             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2545             :       XMMRegister dst = i.OutputSimd128Register();
    2546             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2547             :       // NAN->0, negative->0
    2548           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2549           4 :       __ maxps(dst, kScratchDoubleReg);
    2550             :       // scratch: float representation of max_signed
    2551             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2552           4 :       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
    2553           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
    2554             :       // tmp: convert (src-max_signed).
    2555             :       // Positive overflow lanes -> 0x7FFFFFFF
    2556             :       // Negative lanes -> 0
    2557           4 :       __ movaps(tmp, dst);
    2558           4 :       __ subps(tmp, kScratchDoubleReg);
    2559             :       __ cmpleps(kScratchDoubleReg, tmp);
    2560           4 :       __ cvttps2dq(tmp, tmp);
    2561             :       __ pxor(tmp, kScratchDoubleReg);
    2562             :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2563             :       __ pmaxsd(tmp, kScratchDoubleReg);
    2564             :       // convert. Overflow lanes above max_signed will be 0x80000000
    2565           4 :       __ cvttps2dq(dst, dst);
    2566             :       // Add (src-max_signed) for overflow lanes.
    2567             :       __ paddd(dst, tmp);
    2568             :       break;
    2569             :     }
    2570             :     case kX64I32x4UConvertI16x8Low: {
    2571             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2572           4 :       __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2573             :       break;
    2574             :     }
    2575             :     case kX64I32x4UConvertI16x8High: {
    2576             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2577             :       XMMRegister dst = i.OutputSimd128Register();
    2578           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2579             :       __ pmovzxwd(dst, dst);
    2580             :       break;
    2581             :     }
    2582             :     case kX64I32x4ShrU: {
    2583         248 :       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
    2584         124 :       break;
    2585             :     }
    2586             :     case kX64I32x4MinU: {
    2587             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2588           4 :       __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2589             :       break;
    2590             :     }
    2591             :     case kX64I32x4MaxU: {
    2592             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2593           4 :       __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2594             :       break;
    2595             :     }
    2596             :     case kX64I32x4GtU: {
    2597             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2598             :       XMMRegister dst = i.OutputSimd128Register();
    2599             :       XMMRegister src = i.InputSimd128Register(1);
    2600           8 :       __ pmaxud(dst, src);
    2601             :       __ pcmpeqd(dst, src);
    2602             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2603             :       __ pxor(dst, kScratchDoubleReg);
    2604             :       break;
    2605             :     }
    2606             :     case kX64I32x4GeU: {
    2607             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2608             :       XMMRegister dst = i.OutputSimd128Register();
    2609             :       XMMRegister src = i.InputSimd128Register(1);
    2610           8 :       __ pminud(dst, src);
    2611             :       __ pcmpeqd(dst, src);
    2612             :       break;
    2613             :     }
    2614             :     case kX64S128Zero: {
    2615          22 :       XMMRegister dst = i.OutputSimd128Register();
    2616          22 :       __ xorps(dst, dst);
    2617             :       break;
    2618             :     }
    2619             :     case kX64I16x8Splat: {
    2620         436 :       XMMRegister dst = i.OutputSimd128Register();
    2621         436 :       if (instr->InputAt(0)->IsRegister()) {
    2622         436 :         __ movd(dst, i.InputRegister(0));
    2623             :       } else {
    2624           0 :         __ movd(dst, i.InputOperand(0));
    2625             :       }
    2626         436 :       __ pshuflw(dst, dst, 0x0);
    2627         436 :       __ pshufd(dst, dst, 0x0);
    2628             :       break;
    2629             :     }
    2630             :     case kX64I16x8ExtractLane: {
    2631             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2632          48 :       Register dst = i.OutputRegister();
    2633          96 :       __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2634          48 :       __ movsxwl(dst, dst);
    2635             :       break;
    2636             :     }
    2637             :     case kX64I16x8ReplaceLane: {
    2638             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2639          52 :       if (instr->InputAt(2)->IsRegister()) {
    2640         104 :         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
    2641          52 :                   i.InputInt8(1));
    2642             :       } else {
    2643           0 :         __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2644             :       }
    2645             :       break;
    2646             :     }
    2647             :     case kX64I16x8SConvertI8x16Low: {
    2648             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2649           4 :       __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2650             :       break;
    2651             :     }
    2652             :     case kX64I16x8SConvertI8x16High: {
    2653             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2654             :       XMMRegister dst = i.OutputSimd128Register();
    2655           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2656             :       __ pmovsxbw(dst, dst);
    2657             :       break;
    2658             :     }
    2659             :     case kX64I16x8Neg: {
    2660             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2661             :       XMMRegister dst = i.OutputSimd128Register();
    2662             :       XMMRegister src = i.InputSimd128Register(0);
    2663           4 :       if (dst == src) {
    2664           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2665             :         __ psignw(dst, kScratchDoubleReg);
    2666             :       } else {
    2667           0 :         __ pxor(dst, dst);
    2668             :         __ psubw(dst, src);
    2669             :       }
    2670             :       break;
    2671             :     }
    2672             :     case kX64I16x8Shl: {
    2673         120 :       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
    2674          60 :       break;
    2675             :     }
    2676             :     case kX64I16x8ShrS: {
    2677         120 :       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
    2678          60 :       break;
    2679             :     }
    2680             :     case kX64I16x8SConvertI32x4: {
    2681             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2682           4 :       __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2683             :       break;
    2684             :     }
    2685             :     case kX64I16x8Add: {
    2686           4 :       __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2687             :       break;
    2688             :     }
    2689             :     case kX64I16x8AddSaturateS: {
    2690           4 :       __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2691             :       break;
    2692             :     }
    2693             :     case kX64I16x8AddHoriz: {
    2694             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2695           4 :       __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2696             :       break;
    2697             :     }
    2698             :     case kX64I16x8Sub: {
    2699           4 :       __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2700             :       break;
    2701             :     }
    2702             :     case kX64I16x8SubSaturateS: {
    2703           4 :       __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2704             :       break;
    2705             :     }
    2706             :     case kX64I16x8Mul: {
    2707             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2708           4 :       __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2709             :       break;
    2710             :     }
    2711             :     case kX64I16x8MinS: {
    2712             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2713           4 :       __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2714             :       break;
    2715             :     }
    2716             :     case kX64I16x8MaxS: {
    2717             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2718           4 :       __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2719             :       break;
    2720             :     }
    2721             :     case kX64I16x8Eq: {
    2722          12 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2723             :       break;
    2724             :     }
    2725             :     case kX64I16x8Ne: {
    2726          16 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2727             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2728             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2729             :       break;
    2730             :     }
    2731             :     case kX64I16x8GtS: {
    2732           8 :       __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2733             :       break;
    2734             :     }
    2735             :     case kX64I16x8GeS: {
    2736             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2737             :       XMMRegister dst = i.OutputSimd128Register();
    2738             :       XMMRegister src = i.InputSimd128Register(1);
    2739           8 :       __ pminsw(dst, src);
    2740             :       __ pcmpeqw(dst, src);
    2741             :       break;
    2742             :     }
    2743             :     case kX64I16x8UConvertI8x16Low: {
    2744             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2745           4 :       __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2746             :       break;
    2747             :     }
    2748             :     case kX64I16x8UConvertI8x16High: {
    2749             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2750             :       XMMRegister dst = i.OutputSimd128Register();
    2751           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2752             :       __ pmovzxbw(dst, dst);
    2753             :       break;
    2754             :     }
    2755             :     case kX64I16x8ShrU: {
    2756         120 :       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
    2757          60 :       break;
    2758             :     }
    2759             :     case kX64I16x8UConvertI32x4: {
    2760             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2761             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2762             :       XMMRegister dst = i.OutputSimd128Register();
    2763             :       // Change negative lanes to 0x7FFFFFFF
    2764           4 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2765           4 :       __ psrld(kScratchDoubleReg, 1);
    2766             :       __ pminud(dst, kScratchDoubleReg);
    2767             :       __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
    2768             :       __ packusdw(dst, kScratchDoubleReg);
    2769             :       break;
    2770             :     }
    2771             :     case kX64I16x8AddSaturateU: {
    2772           4 :       __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2773             :       break;
    2774             :     }
    2775             :     case kX64I16x8SubSaturateU: {
    2776           4 :       __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2777             :       break;
    2778             :     }
    2779             :     case kX64I16x8MinU: {
    2780             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2781           4 :       __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2782             :       break;
    2783             :     }
    2784             :     case kX64I16x8MaxU: {
    2785             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2786           4 :       __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2787             :       break;
    2788             :     }
    2789             :     case kX64I16x8GtU: {
    2790             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2791             :       XMMRegister dst = i.OutputSimd128Register();
    2792             :       XMMRegister src = i.InputSimd128Register(1);
    2793          16 :       __ pmaxuw(dst, src);
    2794             :       __ pcmpeqw(dst, src);
    2795             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2796             :       __ pxor(dst, kScratchDoubleReg);
    2797             :       break;
    2798             :     }
    2799             :     case kX64I16x8GeU: {
    2800             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2801             :       XMMRegister dst = i.OutputSimd128Register();
    2802             :       XMMRegister src = i.InputSimd128Register(1);
    2803          16 :       __ pminuw(dst, src);
    2804             :       __ pcmpeqw(dst, src);
    2805             :       break;
    2806             :     }
    2807             :     case kX64I8x16Splat: {
    2808             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2809             :       XMMRegister dst = i.OutputSimd128Register();
    2810         320 :       if (instr->InputAt(0)->IsRegister()) {
    2811         320 :         __ movd(dst, i.InputRegister(0));
    2812             :       } else {
    2813           0 :         __ movd(dst, i.InputOperand(0));
    2814             :       }
    2815         320 :       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
    2816             :       __ pshufb(dst, kScratchDoubleReg);
    2817             :       break;
    2818             :     }
    2819             :     case kX64I8x16ExtractLane: {
    2820             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2821          48 :       Register dst = i.OutputRegister();
    2822          96 :       __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2823          48 :       __ movsxbl(dst, dst);
    2824             :       break;
    2825             :     }
    2826             :     case kX64I8x16ReplaceLane: {
    2827             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2828          84 :       if (instr->InputAt(2)->IsRegister()) {
    2829         168 :         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
    2830          84 :                   i.InputInt8(1));
    2831             :       } else {
    2832           0 :         __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2833             :       }
    2834             :       break;
    2835             :     }
    2836             :     case kX64I8x16SConvertI16x8: {
    2837             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2838           4 :       __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2839             :       break;
    2840             :     }
    2841             :     case kX64I8x16Neg: {
    2842             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2843             :       XMMRegister dst = i.OutputSimd128Register();
    2844             :       XMMRegister src = i.InputSimd128Register(0);
    2845           4 :       if (dst == src) {
    2846           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2847             :         __ psignb(dst, kScratchDoubleReg);
    2848             :       } else {
    2849           0 :         __ pxor(dst, dst);
    2850             :         __ psubb(dst, src);
    2851             :       }
    2852             :       break;
    2853             :     }
    2854             :     case kX64I8x16Shl: {
    2855             :       XMMRegister dst = i.OutputSimd128Register();
    2856             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2857          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2858          28 :       if (shift < 4) {
    2859             :         // For small shifts, doubling is faster.
    2860          60 :         for (int i = 0; i < shift; ++i) {
    2861          24 :           __ paddb(dst, dst);
    2862             :         }
    2863             :       } else {
    2864             :         // Mask off the unwanted bits before word-shifting.
    2865          16 :         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2866          16 :         __ psrlw(kScratchDoubleReg, 8 + shift);
    2867             :         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
    2868             :         __ pand(dst, kScratchDoubleReg);
    2869          16 :         __ psllw(dst, shift);
    2870             :       }
    2871             :       break;
    2872             :     }
    2873             :     case kX64I8x16ShrS: {
    2874             :       XMMRegister dst = i.OutputSimd128Register();
    2875             :       XMMRegister src = i.InputSimd128Register(0);
    2876          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2877             :       // Unpack the bytes into words, do arithmetic shifts, and repack.
    2878          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2879             :       __ punpcklbw(dst, src);
    2880          28 :       __ psraw(kScratchDoubleReg, 8 + shift);
    2881          28 :       __ psraw(dst, 8 + shift);
    2882             :       __ packsswb(dst, kScratchDoubleReg);
    2883             :       break;
    2884             :     }
    2885             :     case kX64I8x16Add: {
    2886           4 :       __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2887             :       break;
    2888             :     }
    2889             :     case kX64I8x16AddSaturateS: {
    2890           4 :       __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2891             :       break;
    2892             :     }
    2893             :     case kX64I8x16Sub: {
    2894           4 :       __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2895             :       break;
    2896             :     }
    2897             :     case kX64I8x16SubSaturateS: {
    2898           4 :       __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2899             :       break;
    2900             :     }
    2901             :     case kX64I8x16Mul: {
    2902             :       XMMRegister dst = i.OutputSimd128Register();
    2903             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2904             :       XMMRegister right = i.InputSimd128Register(1);
    2905             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2906             :       // I16x8 view of I8x16
    2907             :       // left = AAaa AAaa ... AAaa AAaa
    2908             :       // right= BBbb BBbb ... BBbb BBbb
    2909             :       // t = 00AA 00AA ... 00AA 00AA
    2910             :       // s = 00BB 00BB ... 00BB 00BB
    2911           4 :       __ movaps(tmp, dst);
    2912           4 :       __ movaps(kScratchDoubleReg, right);
    2913           4 :       __ psrlw(tmp, 8);
    2914           4 :       __ psrlw(kScratchDoubleReg, 8);
    2915             :       // dst = left * 256
    2916           4 :       __ psllw(dst, 8);
    2917             :       // t = I16x8Mul(t, s)
    2918             :       //    => __PP __PP ...  __PP  __PP
    2919             :       __ pmullw(tmp, kScratchDoubleReg);
    2920             :       // dst = I16x8Mul(left * 256, right)
    2921             :       //    => pp__ pp__ ...  pp__  pp__
    2922             :       __ pmullw(dst, right);
    2923             :       // t = I16x8Shl(t, 8)
    2924             :       //    => PP00 PP00 ...  PP00  PP00
    2925           4 :       __ psllw(tmp, 8);
    2926             :       // dst = I16x8Shr(dst, 8)
    2927             :       //    => 00pp 00pp ...  00pp  00pp
    2928           4 :       __ psrlw(dst, 8);
    2929             :       // dst = I16x8Or(dst, t)
    2930             :       //    => PPpp PPpp ...  PPpp  PPpp
    2931             :       __ por(dst, tmp);
    2932             :       break;
    2933             :     }
    2934             :     case kX64I8x16MinS: {
    2935             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2936           4 :       __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2937             :       break;
    2938             :     }
    2939             :     case kX64I8x16MaxS: {
    2940             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2941           4 :       __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2942             :       break;
    2943             :     }
    2944             :     case kX64I8x16Eq: {
    2945          12 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2946             :       break;
    2947             :     }
    2948             :     case kX64I8x16Ne: {
    2949          16 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2950             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2951             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2952             :       break;
    2953             :     }
    2954             :     case kX64I8x16GtS: {
    2955           8 :       __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2956             :       break;
    2957             :     }
    2958             :     case kX64I8x16GeS: {
    2959             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2960             :       XMMRegister dst = i.OutputSimd128Register();
    2961             :       XMMRegister src = i.InputSimd128Register(1);
    2962           8 :       __ pminsb(dst, src);
    2963             :       __ pcmpeqb(dst, src);
    2964             :       break;
    2965             :     }
    2966             :     case kX64I8x16UConvertI16x8: {
    2967             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2968             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2969             :       XMMRegister dst = i.OutputSimd128Register();
    2970             :       // Change negative lanes to 0x7FFF
    2971           4 :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2972           4 :       __ psrlw(kScratchDoubleReg, 1);
    2973             :       __ pminuw(dst, kScratchDoubleReg);
    2974             :       __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
    2975             :       __ packuswb(dst, kScratchDoubleReg);
    2976             :       break;
    2977             :     }
    2978             :     case kX64I8x16ShrU: {
    2979             :       XMMRegister dst = i.OutputSimd128Register();
    2980             :       XMMRegister src = i.InputSimd128Register(0);
    2981          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2982             :       // Unpack the bytes into words, do logical shifts, and repack.
    2983          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2984             :       __ punpcklbw(dst, src);
    2985          28 :       __ psrlw(kScratchDoubleReg, 8 + shift);
    2986          28 :       __ psrlw(dst, 8 + shift);
    2987             :       __ packuswb(dst, kScratchDoubleReg);
    2988             :       break;
    2989             :     }
    2990             :     case kX64I8x16AddSaturateU: {
    2991           4 :       __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2992             :       break;
    2993             :     }
    2994             :     case kX64I8x16SubSaturateU: {
    2995           4 :       __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2996             :       break;
    2997             :     }
    2998             :     case kX64I8x16MinU: {
    2999             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3000           4 :       __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3001             :       break;
    3002             :     }
    3003             :     case kX64I8x16MaxU: {
    3004             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3005           4 :       __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3006             :       break;
    3007             :     }
    3008             :     case kX64I8x16GtU: {
    3009             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3010             :       XMMRegister dst = i.OutputSimd128Register();
    3011             :       XMMRegister src = i.InputSimd128Register(1);
    3012          16 :       __ pmaxub(dst, src);
    3013             :       __ pcmpeqb(dst, src);
    3014             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    3015             :       __ pxor(dst, kScratchDoubleReg);
    3016             :       break;
    3017             :     }
    3018             :     case kX64I8x16GeU: {
    3019             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3020             :       XMMRegister dst = i.OutputSimd128Register();
    3021             :       XMMRegister src = i.InputSimd128Register(1);
    3022          16 :       __ pminub(dst, src);
    3023             :       __ pcmpeqb(dst, src);
    3024             :       break;
    3025             :     }
    3026             :     case kX64S128And: {
    3027           4 :       __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3028             :       break;
    3029             :     }
    3030             :     case kX64S128Or: {
    3031           4 :       __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3032             :       break;
    3033             :     }
    3034             :     case kX64S128Xor: {
    3035           4 :       __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3036             :       break;
    3037             :     }
    3038             :     case kX64S128Not: {
    3039             :       XMMRegister dst = i.OutputSimd128Register();
    3040             :       XMMRegister src = i.InputSimd128Register(0);
    3041           4 :       if (dst == src) {
    3042           4 :         __ movaps(kScratchDoubleReg, dst);
    3043             :         __ pcmpeqd(dst, dst);
    3044             :         __ pxor(dst, kScratchDoubleReg);
    3045             :       } else {
    3046           0 :         __ pcmpeqd(dst, dst);
    3047             :         __ pxor(dst, src);
    3048             :       }
    3049             : 
    3050             :       break;
    3051             :     }
    3052             :     case kX64S128Select: {
    3053             :       // Mask used here is stored in dst.
    3054          28 :       XMMRegister dst = i.OutputSimd128Register();
    3055          28 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
    3056          28 :       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
    3057          28 :       __ andps(dst, kScratchDoubleReg);
    3058          28 :       __ xorps(dst, i.InputSimd128Register(2));
    3059             :       break;
    3060             :     }
    3061             :     case kX64S8x16Shuffle: {
    3062             :       XMMRegister dst = i.OutputSimd128Register();
    3063             :       Register tmp = i.TempRegister(0);
    3064             :       // Prepare 16 byte aligned buffer for shuffle control mask
    3065        1400 :       __ movq(tmp, rsp);
    3066             :       __ andq(rsp, Immediate(-16));
    3067        1400 :       if (instr->InputCount() == 5) {  // only one input operand
    3068         704 :         uint32_t mask[4] = {};
    3069             :         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    3070        6336 :         for (int j = 4; j > 0; j--) {
    3071        5632 :           mask[j - 1] = i.InputUint32(j);
    3072             :         }
    3073             : 
    3074         704 :         SetupShuffleMaskOnStack(tasm(), mask);
    3075        1408 :         __ pshufb(dst, Operand(rsp, 0));
    3076             :       } else {  // two input operands
    3077             :         DCHECK_EQ(6, instr->InputCount());
    3078        1392 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
    3079         696 :         uint32_t mask[4] = {};
    3080        6264 :         for (int j = 5; j > 1; j--) {
    3081        2784 :           uint32_t lanes = i.InputUint32(j);
    3082       25056 :           for (int k = 0; k < 32; k += 8) {
    3083       11136 :             uint8_t lane = lanes >> k;
    3084       11136 :             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
    3085             :           }
    3086             :         }
    3087         696 :         SetupShuffleMaskOnStack(tasm(), mask);
    3088        1392 :         __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
    3089         696 :         uint32_t mask1[4] = {};
    3090         696 :         if (instr->InputAt(1)->IsSimd128Register()) {
    3091             :           XMMRegister src1 = i.InputSimd128Register(1);
    3092         696 :           if (src1 != dst) __ movups(dst, src1);
    3093             :         } else {
    3094           0 :           __ movups(dst, i.InputOperand(1));
    3095             :         }
    3096        6264 :         for (int j = 5; j > 1; j--) {
    3097        2784 :           uint32_t lanes = i.InputUint32(j);
    3098       25056 :           for (int k = 0; k < 32; k += 8) {
    3099       11136 :             uint8_t lane = lanes >> k;
    3100       11136 :             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
    3101             :           }
    3102             :         }
    3103         696 :         SetupShuffleMaskOnStack(tasm(), mask1);
    3104        1392 :         __ pshufb(dst, Operand(rsp, 0));
    3105             :         __ por(dst, kScratchDoubleReg);
    3106             :       }
    3107             :       __ movq(rsp, tmp);
    3108             :       break;
    3109             :     }
    3110             :     case kX64S32x4Swizzle: {
    3111             :       DCHECK_EQ(2, instr->InputCount());
    3112         924 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
    3113             :                               i.InputInt8(1));
    3114             :       break;
    3115             :     }
    3116             :     case kX64S32x4Shuffle: {
    3117             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3118             :       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
    3119             :       int8_t shuffle = i.InputInt8(2);
    3120             :       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
    3121         768 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
    3122         768 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
    3123         768 :       __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
    3124             :       break;
    3125             :     }
    3126             :     case kX64S16x8Blend: {
    3127         112 :       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
    3128          56 :       break;
    3129             :     }
    3130             :     case kX64S16x8HalfShuffle1: {
    3131         240 :       XMMRegister dst = i.OutputSimd128Register();
    3132         720 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
    3133         240 :       __ pshufhw(dst, dst, i.InputInt8(2));
    3134             :       break;
    3135             :     }
    3136             :     case kX64S16x8HalfShuffle2: {
    3137             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3138         164 :       XMMRegister dst = i.OutputSimd128Register();
    3139         492 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
    3140         164 :       __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
    3141         492 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
    3142         164 :       __ pshufhw(dst, dst, i.InputInt8(3));
    3143         164 :       __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
    3144             :       break;
    3145             :     }
    3146             :     case kX64S8x16Alignr: {
    3147         480 :       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
    3148         240 :       break;
    3149             :     }
    3150             :     case kX64S16x8Dup: {
    3151         124 :       XMMRegister dst = i.OutputSimd128Register();
    3152         124 :       int8_t lane = i.InputInt8(1) & 0x7;
    3153         124 :       int8_t lane4 = lane & 0x3;
    3154         124 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3155         124 :       if (lane < 4) {
    3156         248 :         ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
    3157         124 :         __ pshufd(dst, dst, 0);
    3158             :       } else {
    3159           0 :         ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
    3160           0 :         __ pshufd(dst, dst, 0xaa);
    3161             :       }
    3162             :       break;
    3163             :     }
    3164             :     case kX64S8x16Dup: {
    3165             :       XMMRegister dst = i.OutputSimd128Register();
    3166         264 :       int8_t lane = i.InputInt8(1) & 0xf;
    3167             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3168         264 :       if (lane < 8) {
    3169         264 :         __ punpcklbw(dst, dst);
    3170             :       } else {
    3171           0 :         __ punpckhbw(dst, dst);
    3172             :       }
    3173         264 :       lane &= 0x7;
    3174         264 :       int8_t lane4 = lane & 0x3;
    3175         264 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3176         264 :       if (lane < 4) {
    3177         232 :         __ pshuflw(dst, dst, half_dup);
    3178         232 :         __ pshufd(dst, dst, 0);
    3179             :       } else {
    3180          32 :         __ pshufhw(dst, dst, half_dup);
    3181          32 :         __ pshufd(dst, dst, 0xaa);
    3182             :       }
    3183             :       break;
    3184             :     }
    3185             :     case kX64S64x2UnpackHigh:
    3186           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
    3187             :       break;
    3188             :     case kX64S32x4UnpackHigh:
    3189         276 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
    3190             :       break;
    3191             :     case kX64S16x8UnpackHigh:
    3192         288 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
    3193             :       break;
    3194             :     case kX64S8x16UnpackHigh:
    3195         216 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
    3196             :       break;
    3197             :     case kX64S64x2UnpackLow:
    3198           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
    3199             :       break;
    3200             :     case kX64S32x4UnpackLow:
    3201         264 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
    3202             :       break;
    3203             :     case kX64S16x8UnpackLow:
    3204         348 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
    3205             :       break;
    3206             :     case kX64S8x16UnpackLow:
    3207         300 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
    3208             :       break;
    3209             :     case kX64S16x8UnzipHigh: {
    3210             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3211             :       XMMRegister dst = i.OutputSimd128Register();
    3212             :       XMMRegister src2 = dst;
    3213             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3214          76 :       if (instr->InputCount() == 2) {
    3215         136 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3216          68 :         __ psrld(kScratchDoubleReg, 16);
    3217             :         src2 = kScratchDoubleReg;
    3218             :       }
    3219          76 :       __ psrld(dst, 16);
    3220             :       __ packusdw(dst, src2);
    3221             :       break;
    3222             :     }
    3223             :     case kX64S16x8UnzipLow: {
    3224             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3225             :       XMMRegister dst = i.OutputSimd128Register();
    3226             :       XMMRegister src2 = dst;
    3227             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3228          48 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    3229          48 :       if (instr->InputCount() == 2) {
    3230          80 :         ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
    3231             :         src2 = kScratchDoubleReg;
    3232             :       }
    3233          48 :       __ pblendw(dst, kScratchDoubleReg, 0xaa);
    3234             :       __ packusdw(dst, src2);
    3235             :       break;
    3236             :     }
    3237             :     case kX64S8x16UnzipHigh: {
    3238             :       XMMRegister dst = i.OutputSimd128Register();
    3239             :       XMMRegister src2 = dst;
    3240             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3241         112 :       if (instr->InputCount() == 2) {
    3242         208 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3243         104 :         __ psrlw(kScratchDoubleReg, 8);
    3244             :         src2 = kScratchDoubleReg;
    3245             :       }
    3246         112 :       __ psrlw(dst, 8);
    3247             :       __ packuswb(dst, src2);
    3248             :       break;
    3249             :     }
    3250             :     case kX64S8x16UnzipLow: {
    3251             :       XMMRegister dst = i.OutputSimd128Register();
    3252             :       XMMRegister src2 = dst;
    3253             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3254          72 :       if (instr->InputCount() == 2) {
    3255         128 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3256          64 :         __ psllw(kScratchDoubleReg, 8);
    3257          64 :         __ psrlw(kScratchDoubleReg, 8);
    3258             :         src2 = kScratchDoubleReg;
    3259             :       }
    3260          72 :       __ psllw(dst, 8);
    3261          72 :       __ psrlw(dst, 8);
    3262             :       __ packuswb(dst, src2);
    3263             :       break;
    3264             :     }
    3265             :     case kX64S8x16TransposeLow: {
    3266             :       XMMRegister dst = i.OutputSimd128Register();
    3267             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3268          80 :       __ psllw(dst, 8);
    3269          80 :       if (instr->InputCount() == 1) {
    3270           8 :         __ movups(kScratchDoubleReg, dst);
    3271             :       } else {
    3272             :         DCHECK_EQ(2, instr->InputCount());
    3273         144 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3274          72 :         __ psllw(kScratchDoubleReg, 8);
    3275             :       }
    3276          80 :       __ psrlw(dst, 8);
    3277             :       __ por(dst, kScratchDoubleReg);
    3278             :       break;
    3279             :     }
    3280             :     case kX64S8x16TransposeHigh: {
    3281             :       XMMRegister dst = i.OutputSimd128Register();
    3282             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3283          96 :       __ psrlw(dst, 8);
    3284          96 :       if (instr->InputCount() == 1) {
    3285           8 :         __ movups(kScratchDoubleReg, dst);
    3286             :       } else {
    3287             :         DCHECK_EQ(2, instr->InputCount());
    3288         176 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3289          88 :         __ psrlw(kScratchDoubleReg, 8);
    3290             :       }
    3291          96 :       __ psllw(kScratchDoubleReg, 8);
    3292             :       __ por(dst, kScratchDoubleReg);
    3293             :       break;
    3294             :     }
    3295             :     case kX64S8x8Reverse:
    3296             :     case kX64S8x4Reverse:
    3297             :     case kX64S8x2Reverse: {
    3298             :       DCHECK_EQ(1, instr->InputCount());
    3299             :       XMMRegister dst = i.OutputSimd128Register();
    3300             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3301         336 :       if (arch_opcode != kX64S8x2Reverse) {
    3302             :         // First shuffle words into position.
    3303         228 :         int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
    3304         228 :         __ pshuflw(dst, dst, shuffle_mask);
    3305         228 :         __ pshufhw(dst, dst, shuffle_mask);
    3306             :       }
    3307         336 :       __ movaps(kScratchDoubleReg, dst);
    3308         336 :       __ psrlw(kScratchDoubleReg, 8);
    3309         336 :       __ psllw(dst, 8);
    3310             :       __ por(dst, kScratchDoubleReg);
    3311             :       break;
    3312             :     }
    3313             :     case kX64S1x4AnyTrue:
    3314             :     case kX64S1x8AnyTrue:
    3315             :     case kX64S1x16AnyTrue: {
    3316             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3317             :       Register dst = i.OutputRegister();
    3318             :       XMMRegister src = i.InputSimd128Register(0);
    3319             :       Register tmp = i.TempRegister(0);
    3320          60 :       __ xorq(tmp, tmp);
    3321             :       __ movq(dst, Immediate(1));
    3322             :       __ ptest(src, src);
    3323          60 :       __ cmovq(zero, dst, tmp);
    3324             :       break;
    3325             :     }
    3326             :     case kX64S1x4AllTrue:
    3327             :     case kX64S1x8AllTrue:
    3328             :     case kX64S1x16AllTrue: {
    3329             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3330             :       Register dst = i.OutputRegister();
    3331             :       XMMRegister src = i.InputSimd128Register(0);
    3332             :       Register tmp = i.TempRegister(0);
    3333          60 :       __ movq(tmp, Immediate(1));
    3334             :       __ xorq(dst, dst);
    3335             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    3336             :       __ pxor(kScratchDoubleReg, src);
    3337             :       __ ptest(kScratchDoubleReg, kScratchDoubleReg);
    3338          60 :       __ cmovq(zero, dst, tmp);
    3339             :       break;
    3340             :     }
    3341             :     case kX64StackCheck:
    3342      562627 :       __ CompareRoot(rsp, RootIndex::kStackLimit);
    3343      562635 :       break;
    3344             :     case kWord32AtomicExchangeInt8: {
    3345         817 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3346         817 :       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
    3347         817 :       break;
    3348             :     }
    3349             :     case kWord32AtomicExchangeUint8: {
    3350         743 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3351             :       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
    3352             :       break;
    3353             :     }
    3354             :     case kWord32AtomicExchangeInt16: {
    3355         875 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3356         877 :       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
    3357         877 :       break;
    3358             :     }
    3359             :     case kWord32AtomicExchangeUint16: {
    3360         708 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3361             :       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
    3362             :       break;
    3363             :     }
    3364             :     case kWord32AtomicExchangeWord32: {
    3365        1178 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3366             :       break;
    3367             :     }
    3368             :     case kWord32AtomicCompareExchangeInt8: {
    3369         112 :       __ lock();
    3370         112 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3371         112 :       __ movsxbl(rax, rax);
    3372         112 :       break;
    3373             :     }
    3374             :     case kWord32AtomicCompareExchangeUint8: {
    3375         132 :       __ lock();
    3376         132 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3377             :       __ movzxbl(rax, rax);
    3378             :       break;
    3379             :     }
    3380             :     case kWord32AtomicCompareExchangeInt16: {
    3381         112 :       __ lock();
    3382         112 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3383         112 :       __ movsxwl(rax, rax);
    3384         112 :       break;
    3385             :     }
    3386             :     case kWord32AtomicCompareExchangeUint16: {
    3387         133 :       __ lock();
    3388         133 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3389             :       __ movzxwl(rax, rax);
    3390             :       break;
    3391             :     }
    3392             :     case kWord32AtomicCompareExchangeWord32: {
    3393         263 :       __ lock();
    3394             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3395             :       break;
    3396             :     }
    3397             : #define ATOMIC_BINOP_CASE(op, inst)              \
    3398             :   case kWord32Atomic##op##Int8:                  \
    3399             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3400             :     __ movsxbl(rax, rax);                        \
    3401             :     break;                                       \
    3402             :   case kWord32Atomic##op##Uint8:                 \
    3403             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3404             :     __ movzxbl(rax, rax);                        \
    3405             :     break;                                       \
    3406             :   case kWord32Atomic##op##Int16:                 \
    3407             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3408             :     __ movsxwl(rax, rax);                        \
    3409             :     break;                                       \
    3410             :   case kWord32Atomic##op##Uint16:                \
    3411             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3412             :     __ movzxwl(rax, rax);                        \
    3413             :     break;                                       \
    3414             :   case kWord32Atomic##op##Word32:                \
    3415             :     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
    3416             :     break;
    3417        8556 :       ATOMIC_BINOP_CASE(Add, addl)
    3418        8531 :       ATOMIC_BINOP_CASE(Sub, subl)
    3419        7283 :       ATOMIC_BINOP_CASE(And, andl)
    3420        8879 :       ATOMIC_BINOP_CASE(Or, orl)
    3421        8748 :       ATOMIC_BINOP_CASE(Xor, xorl)
    3422             : #undef ATOMIC_BINOP_CASE
    3423             :     case kX64Word64AtomicExchangeUint8: {
    3424         565 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3425             :       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
    3426             :       break;
    3427             :     }
    3428             :     case kX64Word64AtomicExchangeUint16: {
    3429         599 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3430             :       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
    3431             :       break;
    3432             :     }
    3433             :     case kX64Word64AtomicExchangeUint32: {
    3434         490 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3435             :       break;
    3436             :     }
    3437             :     case kX64Word64AtomicExchangeUint64: {
    3438         676 :       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
    3439             :       break;
    3440             :     }
    3441             :     case kX64Word64AtomicCompareExchangeUint8: {
    3442          18 :       __ lock();
    3443          18 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3444             :       __ movzxbq(rax, rax);
    3445             :       break;
    3446             :     }
    3447             :     case kX64Word64AtomicCompareExchangeUint16: {
    3448          27 :       __ lock();
    3449          27 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3450             :       __ movzxwq(rax, rax);
    3451             :       break;
    3452             :     }
    3453             :     case kX64Word64AtomicCompareExchangeUint32: {
    3454          29 :       __ lock();
    3455             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3456             :       break;
    3457             :     }
    3458             :     case kX64Word64AtomicCompareExchangeUint64: {
    3459         276 :       __ lock();
    3460             :       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
    3461             :       break;
    3462             :     }
    3463             : #define ATOMIC64_BINOP_CASE(op, inst)              \
    3464             :   case kX64Word64Atomic##op##Uint8:                \
    3465             :     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
    3466             :     __ movzxbq(rax, rax);                          \
    3467             :     break;                                         \
    3468             :   case kX64Word64Atomic##op##Uint16:               \
    3469             :     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
    3470             :     __ movzxwq(rax, rax);                          \
    3471             :     break;                                         \
    3472             :   case kX64Word64Atomic##op##Uint32:               \
    3473             :     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
    3474             :     break;                                         \
    3475             :   case kX64Word64Atomic##op##Uint64:               \
    3476             :     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
    3477             :     break;
    3478        3892 :       ATOMIC64_BINOP_CASE(Add, addq)
    3479        2997 :       ATOMIC64_BINOP_CASE(Sub, subq)
    3480        4054 :       ATOMIC64_BINOP_CASE(And, andq)
    3481        4164 :       ATOMIC64_BINOP_CASE(Or, orq)
    3482        3070 :       ATOMIC64_BINOP_CASE(Xor, xorq)
    3483             : #undef ATOMIC64_BINOP_CASE
    3484             :     case kWord32AtomicLoadInt8:
    3485             :     case kWord32AtomicLoadUint8:
    3486             :     case kWord32AtomicLoadInt16:
    3487             :     case kWord32AtomicLoadUint16:
    3488             :     case kWord32AtomicLoadWord32:
    3489             :     case kWord32AtomicStoreWord8:
    3490             :     case kWord32AtomicStoreWord16:
    3491             :     case kWord32AtomicStoreWord32:
    3492             :     case kX64Word64AtomicLoadUint8:
    3493             :     case kX64Word64AtomicLoadUint16:
    3494             :     case kX64Word64AtomicLoadUint32:
    3495             :     case kX64Word64AtomicLoadUint64:
    3496             :     case kX64Word64AtomicStoreWord8:
    3497             :     case kX64Word64AtomicStoreWord16:
    3498             :     case kX64Word64AtomicStoreWord32:
    3499             :     case kX64Word64AtomicStoreWord64:
    3500           0 :       UNREACHABLE();  // Won't be generated by instruction selector.
    3501             :       break;
    3502             :   }
    3503             :   return kSuccess;
    3504             : }  // NOLadability/fn_size)
    3505             : 
    3506             : #undef ASSEMBLE_UNOP
    3507             : #undef ASSEMBLE_BINOP
    3508             : #undef ASSEMBLE_COMPARE
    3509             : #undef ASSEMBLE_MULT
    3510             : #undef ASSEMBLE_SHIFT
    3511             : #undef ASSEMBLE_MOVX
    3512             : #undef ASSEMBLE_SSE_BINOP
    3513             : #undef ASSEMBLE_SSE_UNOP
    3514             : #undef ASSEMBLE_AVX_BINOP
    3515             : #undef ASSEMBLE_IEEE754_BINOP
    3516             : #undef ASSEMBLE_IEEE754_UNOP
    3517             : #undef ASSEMBLE_ATOMIC_BINOP
    3518             : #undef ASSEMBLE_ATOMIC64_BINOP
    3519             : #undef ASSEMBLE_SIMD_INSTR
    3520             : #undef ASSEMBLE_SIMD_IMM_INSTR
    3521             : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
    3522             : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
    3523             : 
    3524             : namespace {
    3525             : 
    3526     6195604 : Condition FlagsConditionToCondition(FlagsCondition condition) {
    3527     6195604 :   switch (condition) {
    3528             :     case kUnorderedEqual:
    3529             :     case kEqual:
    3530             :       return equal;
    3531             :     case kUnorderedNotEqual:
    3532             :     case kNotEqual:
    3533     1450383 :       return not_equal;
    3534             :     case kSignedLessThan:
    3535      176519 :       return less;
    3536             :     case kSignedGreaterThanOrEqual:
    3537       59288 :       return greater_equal;
    3538             :     case kSignedLessThanOrEqual:
    3539       66213 :       return less_equal;
    3540             :     case kSignedGreaterThan:
    3541       67172 :       return greater;
    3542             :     case kUnsignedLessThan:
    3543      164001 :       return below;
    3544             :     case kUnsignedGreaterThanOrEqual:
    3545      378916 :       return above_equal;
    3546             :     case kUnsignedLessThanOrEqual:
    3547      945163 :       return below_equal;
    3548             :     case kUnsignedGreaterThan:
    3549      120214 :       return above;
    3550             :     case kOverflow:
    3551      179939 :       return overflow;
    3552             :     case kNotOverflow:
    3553        1008 :       return no_overflow;
    3554             :     default:
    3555             :       break;
    3556             :   }
    3557           0 :   UNREACHABLE();
    3558             : }
    3559             : 
    3560             : }  // namespace
    3561             : 
    3562             : // Assembles branches after this instruction.
    3563     5349189 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
    3564             :   Label::Distance flabel_distance =
    3565     5349189 :       branch->fallthru ? Label::kNear : Label::kFar;
    3566     5349189 :   Label* tlabel = branch->true_label;
    3567     5349189 :   Label* flabel = branch->false_label;
    3568     5349189 :   if (branch->condition == kUnorderedEqual) {
    3569       57915 :     __ j(parity_even, flabel, flabel_distance);
    3570     5291274 :   } else if (branch->condition == kUnorderedNotEqual) {
    3571      104680 :     __ j(parity_even, tlabel);
    3572             :   }
    3573     5349189 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3574             : 
    3575     5349222 :   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
    3576     5349222 : }
    3577             : 
    3578           0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
    3579             :                                             Instruction* instr) {
    3580             :   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
    3581           0 :   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
    3582             :     return;
    3583             :   }
    3584             : 
    3585             :   condition = NegateFlagsCondition(condition);
    3586           0 :   __ movl(kScratchRegister, Immediate(0));
    3587           0 :   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
    3588           0 :            kScratchRegister);
    3589             : }
    3590             : 
    3591      328057 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
    3592             :                                             BranchInfo* branch) {
    3593             :   Label::Distance flabel_distance =
    3594      328057 :       branch->fallthru ? Label::kNear : Label::kFar;
    3595      328057 :   Label* tlabel = branch->true_label;
    3596      328057 :   Label* flabel = branch->false_label;
    3597      328057 :   Label nodeopt;
    3598      328057 :   if (branch->condition == kUnorderedEqual) {
    3599           0 :     __ j(parity_even, flabel, flabel_distance);
    3600      328057 :   } else if (branch->condition == kUnorderedNotEqual) {
    3601        4140 :     __ j(parity_even, tlabel);
    3602             :   }
    3603      328057 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3604             : 
    3605      328058 :   if (FLAG_deopt_every_n_times > 0) {
    3606             :     ExternalReference counter =
    3607         288 :         ExternalReference::stress_deopt_count(isolate());
    3608             : 
    3609         288 :     __ pushfq();
    3610         288 :     __ pushq(rax);
    3611         288 :     __ load_rax(counter);
    3612             :     __ decl(rax);
    3613         288 :     __ j(not_zero, &nodeopt);
    3614             : 
    3615         576 :     __ Set(rax, FLAG_deopt_every_n_times);
    3616         288 :     __ store_rax(counter);
    3617         288 :     __ popq(rax);
    3618         288 :     __ popfq();
    3619         288 :     __ jmp(tlabel);
    3620             : 
    3621         288 :     __ bind(&nodeopt);
    3622         288 :     __ store_rax(counter);
    3623         288 :     __ popq(rax);
    3624         288 :     __ popfq();
    3625             :   }
    3626             : 
    3627      328058 :   if (!branch->fallthru) {
    3628           0 :     __ jmp(flabel, flabel_distance);
    3629             :   }
    3630      328058 : }
    3631             : 
    3632     5035280 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
    3633     8252469 :   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
    3634     5035331 : }
    3635             : 
    3636      142022 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
    3637             :                                      FlagsCondition condition) {
    3638             :   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
    3639             :   Label* tlabel = ool->entry();
    3640      141934 :   Label end;
    3641      141934 :   if (condition == kUnorderedEqual) {
    3642           0 :     __ j(parity_even, &end);
    3643      141934 :   } else if (condition == kUnorderedNotEqual) {
    3644         307 :     __ j(parity_even, tlabel);
    3645             :   }
    3646      141934 :   __ j(FlagsConditionToCondition(condition), tlabel);
    3647      142080 :   __ bind(&end);
    3648      142164 : }
    3649             : 
    3650             : // Assembles boolean materializations after this instruction.
    3651      376429 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
    3652             :                                         FlagsCondition condition) {
    3653             :   X64OperandConverter i(this, instr);
    3654      376429 :   Label done;
    3655             : 
    3656             :   // Materialize a full 64-bit 1 or 0 value. The result register is always the
    3657             :   // last output of the instruction.
    3658      376429 :   Label check;
    3659             :   DCHECK_NE(0u, instr->OutputCount());
    3660      376429 :   Register reg = i.OutputRegister(instr->OutputCount() - 1);
    3661      376429 :   if (condition == kUnorderedEqual) {
    3662        2920 :     __ j(parity_odd, &check, Label::kNear);
    3663             :     __ movl(reg, Immediate(0));
    3664        2920 :     __ jmp(&done, Label::kNear);
    3665      373509 :   } else if (condition == kUnorderedNotEqual) {
    3666        2492 :     __ j(parity_odd, &check, Label::kNear);
    3667             :     __ movl(reg, Immediate(1));
    3668        2492 :     __ jmp(&done, Label::kNear);
    3669             :   }
    3670      376429 :   __ bind(&check);
    3671      376433 :   __ setcc(FlagsConditionToCondition(condition), reg);
    3672             :   __ movzxbl(reg, reg);
    3673      376430 :   __ bind(&done);
    3674      376434 : }
    3675             : 
    3676       34090 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
    3677             :   X64OperandConverter i(this, instr);
    3678       34090 :   Register input = i.InputRegister(0);
    3679             :   std::vector<std::pair<int32_t, Label*>> cases;
    3680      419084 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3681      384992 :     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
    3682             :   }
    3683             :   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
    3684       34092 :                                       cases.data() + cases.size());
    3685       34091 : }
    3686             : 
    3687           0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
    3688             :   X64OperandConverter i(this, instr);
    3689             :   Register input = i.InputRegister(0);
    3690           0 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3691           0 :     __ cmpl(input, Immediate(i.InputInt32(index + 0)));
    3692           0 :     __ j(equal, GetLabel(i.InputRpo(index + 1)));
    3693             :   }
    3694           0 :   AssembleArchJump(i.InputRpo(1));
    3695           0 : }
    3696             : 
    3697         315 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
    3698             :   X64OperandConverter i(this, instr);
    3699             :   Register input = i.InputRegister(0);
    3700         315 :   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
    3701         315 :   Label** cases = zone()->NewArray<Label*>(case_count);
    3702      399611 :   for (int32_t index = 0; index < case_count; ++index) {
    3703      399296 :     cases[index] = GetLabel(i.InputRpo(index + 2));
    3704             :   }
    3705         315 :   Label* const table = AddJumpTable(cases, case_count);
    3706         315 :   __ cmpl(input, Immediate(case_count));
    3707         315 :   __ j(above_equal, GetLabel(i.InputRpo(1)));
    3708         630 :   __ leaq(kScratchRegister, Operand(table));
    3709         315 :   __ jmp(Operand(kScratchRegister, input, times_8, 0));
    3710         315 : }
    3711             : 
    3712             : namespace {
    3713             : 
    3714             : static const int kQuadWordSize = 16;
    3715             : 
    3716             : }  // namespace
    3717             : 
    3718     2629955 : void CodeGenerator::FinishFrame(Frame* frame) {
    3719             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3720             : 
    3721             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3722     2629955 :   if (saves_fp != 0) {
    3723             :     frame->AlignSavedCalleeRegisterSlots();
    3724           0 :     if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3725             :       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3726           0 :       frame->AllocateSavedCalleeRegisterSlots(
    3727           0 :           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
    3728             :     }
    3729             :   }
    3730             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3731     2629955 :   if (saves != 0) {  // Save callee-saved registers.
    3732             :     int count = 0;
    3733    29846157 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3734    14470864 :       if (((1 << i) & saves)) {
    3735     4522145 :         ++count;
    3736             :       }
    3737             :     }
    3738             :     frame->AllocateSavedCalleeRegisterSlots(count);
    3739             :   }
    3740     2629955 : }
    3741             : 
    3742     2651810 : void CodeGenerator::AssembleConstructFrame() {
    3743             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3744     2651810 :   if (frame_access_state()->has_frame()) {
    3745             :     int pc_base = __ pc_offset();
    3746             : 
    3747     2652514 :     if (call_descriptor->IsCFunctionCall()) {
    3748      904429 :       __ pushq(rbp);
    3749             :       __ movq(rbp, rsp);
    3750     1748085 :     } else if (call_descriptor->IsJSFunctionCall()) {
    3751      645108 :       __ Prologue();
    3752      645116 :       if (call_descriptor->PushArgumentCount()) {
    3753       39056 :         __ pushq(kJavaScriptCallArgCountRegister);
    3754             :       }
    3755             :     } else {
    3756     2206078 :       __ StubPrologue(info()->GetOutputStackFrameType());
    3757     1102824 :       if (call_descriptor->IsWasmFunctionCall()) {
    3758      977424 :         __ pushq(kWasmInstanceRegister);
    3759      125400 :       } else if (call_descriptor->IsWasmImportWrapper()) {
    3760             :         // WASM import wrappers are passed a tuple in the place of the instance.
    3761             :         // Unpack the tuple into the instance and the target callable.
    3762             :         // This must be done here in the codegen because it cannot be expressed
    3763             :         // properly in the graph.
    3764             :         __ LoadTaggedPointerField(
    3765             :             kJSFunctionRegister,
    3766        6858 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
    3767             :         __ LoadTaggedPointerField(
    3768             :             kWasmInstanceRegister,
    3769        6858 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
    3770        6858 :         __ pushq(kWasmInstanceRegister);
    3771             :       }
    3772             :     }
    3773             : 
    3774     2652635 :     unwinding_info_writer_.MarkFrameConstructed(pc_base);
    3775             :   }
    3776             :   int required_slots = frame()->GetTotalFrameSlotCount() -
    3777     2651176 :                        call_descriptor->CalculateFixedFrameSize();
    3778             : 
    3779     2652142 :   if (info()->is_osr()) {
    3780             :     // TurboFan OSR-compiled functions cannot be entered directly.
    3781        4629 :     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
    3782             : 
    3783             :     // Unoptimized code jumps directly to this entrypoint while the unoptimized
    3784             :     // frame is still on the stack. Optimized code uses OSR values directly from
    3785             :     // the unoptimized frame. Thus, all that needs to be done is to allocate the
    3786             :     // remaining stack slots.
    3787        4629 :     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
    3788        4629 :     osr_pc_offset_ = __ pc_offset();
    3789        4629 :     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
    3790        4629 :     ResetSpeculationPoison();
    3791             :   }
    3792             : 
    3793             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3794             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3795             : 
    3796     2651906 :   if (required_slots > 0) {
    3797             :     DCHECK(frame_access_state()->has_frame());
    3798     2048694 :     if (info()->IsWasm() && required_slots > 128) {
    3799             :       // For WebAssembly functions with big frames we have to do the stack
    3800             :       // overflow check before we construct the frame. Otherwise we may not
    3801             :       // have enough space on the stack to call the runtime for the stack
    3802             :       // overflow.
    3803           8 :       Label done;
    3804             : 
    3805             :       // If the frame is bigger than the stack, we throw the stack overflow
    3806             :       // exception unconditionally. Thereby we can avoid the integer overflow
    3807             :       // check in the condition code.
    3808           8 :       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
    3809           8 :         __ movq(kScratchRegister,
    3810             :                 FieldOperand(kWasmInstanceRegister,
    3811             :                              WasmInstanceObject::kRealStackLimitAddressOffset));
    3812          16 :         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
    3813             :         __ addq(kScratchRegister,
    3814             :                 Immediate(required_slots * kSystemPointerSize));
    3815             :         __ cmpq(rsp, kScratchRegister);
    3816           8 :         __ j(above_equal, &done);
    3817             :       }
    3818             : 
    3819           8 :       __ near_call(wasm::WasmCode::kWasmStackOverflow,
    3820           8 :                    RelocInfo::WASM_STUB_CALL);
    3821             :       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
    3822             :       RecordSafepoint(reference_map, Safepoint::kSimple,
    3823           8 :                       Safepoint::kNoLazyDeopt);
    3824           8 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
    3825           8 :       __ bind(&done);
    3826             :     }
    3827             : 
    3828             :     // Skip callee-saved and return slots, which are created below.
    3829     2048694 :     required_slots -= base::bits::CountPopulation(saves);
    3830             :     required_slots -= base::bits::CountPopulation(saves_fp) *
    3831     2048694 :                       (kQuadWordSize / kSystemPointerSize);
    3832     2048694 :     required_slots -= frame()->GetReturnSlotCount();
    3833     2048694 :     if (required_slots > 0) {
    3834     1872198 :       __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
    3835             :     }
    3836             :   }
    3837             : 
    3838     2651924 :   if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3839             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3840           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3841             :     // Adjust the stack pointer.
    3842           0 :     __ subq(rsp, Immediate(stack_size));
    3843             :     // Store the registers on the stack.
    3844             :     int slot_idx = 0;
    3845           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3846           0 :       if (!((1 << i) & saves_fp)) continue;
    3847           0 :       __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
    3848           0 :                 XMMRegister::from_code(i));
    3849           0 :       slot_idx++;
    3850             :     }
    3851             :   }
    3852             : 
    3853     2651924 :   if (saves != 0) {  // Save callee-saved registers.
    3854    29846157 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3855    14470864 :       if (!((1 << i) & saves)) continue;
    3856     4522145 :       __ pushq(Register::from_code(i));
    3857             :     }
    3858             :   }
    3859             : 
    3860             :   // Allocate return slots (located after callee-saved).
    3861     2651924 :   if (frame()->GetReturnSlotCount() > 0) {
    3862         666 :     __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
    3863             :   }
    3864     2651925 : }
    3865             : 
    3866     2988870 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
    3867             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3868             : 
    3869             :   // Restore registers.
    3870             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3871     2988870 :   if (saves != 0) {
    3872             :     const int returns = frame()->GetReturnSlotCount();
    3873      913797 :     if (returns != 0) {
    3874         656 :       __ addq(rsp, Immediate(returns * kSystemPointerSize));
    3875             :     }
    3876    30155301 :     for (int i = 0; i < Register::kNumRegisters; i++) {
    3877    14620752 :       if (!((1 << i) & saves)) continue;
    3878     4568985 :       __ popq(Register::from_code(i));
    3879             :     }
    3880             :   }
    3881             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3882     2988870 :   if (saves_fp != 0) {
    3883             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3884           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3885             :     // Load the registers from the stack.
    3886             :     int slot_idx = 0;
    3887           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3888           0 :       if (!((1 << i) & saves_fp)) continue;
    3889           0 :       __ movdqu(XMMRegister::from_code(i),
    3890           0 :                 Operand(rsp, kQuadWordSize * slot_idx));
    3891           0 :       slot_idx++;
    3892             :     }
    3893             :     // Adjust the stack pointer.
    3894           0 :     __ addq(rsp, Immediate(stack_size));
    3895             :   }
    3896             : 
    3897             :   unwinding_info_writer_.MarkBlockWillExit();
    3898             : 
    3899             :   // Might need rcx for scratch if pop_size is too big or if there is a variable
    3900             :   // pop count.
    3901             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
    3902             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
    3903     2988663 :   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
    3904             :   X64OperandConverter g(this, nullptr);
    3905     2988663 :   if (call_descriptor->IsCFunctionCall()) {
    3906      913797 :     AssembleDeconstructFrame();
    3907     2074866 :   } else if (frame_access_state()->has_frame()) {
    3908     4021987 :     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
    3909             :       // Canonicalize JSFunction return sites for now.
    3910     1991125 :       if (return_label_.is_bound()) {
    3911      369232 :         __ jmp(&return_label_);
    3912             :         return;
    3913             :       } else {
    3914     1621893 :         __ bind(&return_label_);
    3915     1621969 :         AssembleDeconstructFrame();
    3916             :       }
    3917             :     } else {
    3918       39900 :       AssembleDeconstructFrame();
    3919             :     }
    3920             :   }
    3921             : 
    3922     2620843 :   if (pop->IsImmediate()) {
    3923     5162206 :     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
    3924     2581239 :     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
    3925     5162478 :     __ Ret(static_cast<int>(pop_size), rcx);
    3926             :   } else {
    3927             :     Register pop_reg = g.ToRegister(pop);
    3928       39876 :     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
    3929       39876 :     __ popq(scratch_reg);
    3930       79752 :     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
    3931       39876 :     __ jmp(scratch_reg);
    3932             :   }
    3933             : }
    3934             : 
    3935     2629810 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
    3936             : 
    3937    37780899 : void CodeGenerator::AssembleMove(InstructionOperand* source,
    3938             :                                  InstructionOperand* destination) {
    3939             :   X64OperandConverter g(this, nullptr);
    3940             :   // Helper function to write the given constant to the dst register.
    3941    18408755 :   auto MoveConstantToRegister = [&](Register dst, Constant src) {
    3942    18408755 :     switch (src.type()) {
    3943             :       case Constant::kInt32: {
    3944     3992970 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3945    26272008 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3946             :         } else {
    3947             :           int32_t value = src.ToInt32();
    3948     3992970 :           if (value == 0) {
    3949     1108922 :             __ xorl(dst, dst);
    3950             :           } else {
    3951     2884048 :             __ movl(dst, Immediate(value));
    3952             :           }
    3953             :         }
    3954             :         break;
    3955             :       }
    3956             :       case Constant::kInt64:
    3957     2318975 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3958           0 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3959             :         } else {
    3960     2318975 :           __ Set(dst, src.ToInt64());
    3961             :         }
    3962             :         break;
    3963             :       case Constant::kFloat32:
    3964         824 :         __ MoveNumber(dst, src.ToFloat32());
    3965         412 :         break;
    3966             :       case Constant::kFloat64:
    3967     1522619 :         __ MoveNumber(dst, src.ToFloat64().value());
    3968     1522622 :         break;
    3969             :       case Constant::kExternalReference:
    3970     2708418 :         __ Move(dst, src.ToExternalReference());
    3971     2708437 :         break;
    3972             :       case Constant::kHeapObject: {
    3973     7863244 :         Handle<HeapObject> src_object = src.ToHeapObject();
    3974             :         RootIndex index;
    3975     7863246 :         if (IsMaterializableFromRoot(src_object, &index)) {
    3976     1901747 :           __ LoadRoot(dst, index);
    3977             :         } else {
    3978     5961501 :           __ Move(dst, src_object);
    3979             :         }
    3980             :         break;
    3981             :       }
    3982             :       case Constant::kDelayedStringConstant: {
    3983        2120 :         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
    3984        2120 :         __ MoveStringConstant(dst, src_constant);
    3985        2120 :         break;
    3986             :       }
    3987             :       case Constant::kRpoNumber:
    3988           0 :         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
    3989             :         break;
    3990             :     }
    3991    56189736 :   };
    3992             :   // Helper function to write the given constant to the stack.
    3993       38645 :   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
    3994       38645 :     if (!RelocInfo::IsWasmReference(src.rmode())) {
    3995       38645 :       switch (src.type()) {
    3996             :         case Constant::kInt32:
    3997       38645 :           __ movq(dst, Immediate(src.ToInt32()));
    3998       19627 :           return;
    3999             :         case Constant::kInt64:
    4000       12984 :           __ Set(dst, src.ToInt64());
    4001       12984 :           return;
    4002             :         default:
    4003             :           break;
    4004             :       }
    4005             :     }
    4006        6034 :     MoveConstantToRegister(kScratchRegister, src);
    4007        6034 :     __ movq(dst, kScratchRegister);
    4008    37780899 :   };
    4009             :   // Dispatch on the source and destination operand kinds.
    4010    37780899 :   switch (MoveType::InferMove(source, destination)) {
    4011             :     case MoveType::kRegisterToRegister:
    4012     4177874 :       if (source->IsRegister()) {
    4013     4035564 :         __ movq(g.ToRegister(destination), g.ToRegister(source));
    4014             :       } else {
    4015             :         DCHECK(source->IsFPRegister());
    4016             :         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
    4017             :       }
    4018             :       return;
    4019             :     case MoveType::kRegisterToStack: {
    4020     5686759 :       Operand dst = g.ToOperand(destination);
    4021     5686759 :       if (source->IsRegister()) {
    4022     5374177 :         __ movq(dst, g.ToRegister(source));
    4023             :       } else {
    4024             :         DCHECK(source->IsFPRegister());
    4025             :         XMMRegister src = g.ToDoubleRegister(source);
    4026             :         MachineRepresentation rep =
    4027             :             LocationOperand::cast(source)->representation();
    4028      312582 :         if (rep != MachineRepresentation::kSimd128) {
    4029             :           __ Movsd(dst, src);
    4030             :         } else {
    4031             :           __ Movups(dst, src);
    4032             :         }
    4033             :       }
    4034             :       return;
    4035             :     }
    4036             :     case MoveType::kStackToRegister: {
    4037     8879031 :       Operand src = g.ToOperand(source);
    4038     8879031 :       if (source->IsStackSlot()) {
    4039     8392929 :         __ movq(g.ToRegister(destination), src);
    4040             :       } else {
    4041             :         DCHECK(source->IsFPStackSlot());
    4042             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4043             :         MachineRepresentation rep =
    4044             :             LocationOperand::cast(source)->representation();
    4045      486102 :         if (rep != MachineRepresentation::kSimd128) {
    4046             :           __ Movsd(dst, src);
    4047             :         } else {
    4048             :           __ Movups(dst, src);
    4049             :         }
    4050             :       }
    4051             :       return;
    4052             :     }
    4053             :     case MoveType::kStackToStack: {
    4054       50173 :       Operand src = g.ToOperand(source);
    4055       50173 :       Operand dst = g.ToOperand(destination);
    4056       50173 :       if (source->IsStackSlot()) {
    4057             :         // Spill on demand to use a temporary register for memory-to-memory
    4058             :         // moves.
    4059       25015 :         __ movq(kScratchRegister, src);
    4060             :         __ movq(dst, kScratchRegister);
    4061             :       } else {
    4062             :         MachineRepresentation rep =
    4063             :             LocationOperand::cast(source)->representation();
    4064       25158 :         if (rep != MachineRepresentation::kSimd128) {
    4065             :           __ Movsd(kScratchDoubleReg, src);
    4066             :           __ Movsd(dst, kScratchDoubleReg);
    4067             :         } else {
    4068             :           DCHECK(source->IsSimd128StackSlot());
    4069             :           __ Movups(kScratchDoubleReg, src);
    4070             :           __ Movups(dst, kScratchDoubleReg);
    4071             :         }
    4072             :       }
    4073             :       return;
    4074             :     }
    4075             :     case MoveType::kConstantToRegister: {
    4076    18942774 :       Constant src = g.ToConstant(source);
    4077    18942972 :       if (destination->IsRegister()) {
    4078    18402770 :         MoveConstantToRegister(g.ToRegister(destination), src);
    4079             :       } else {
    4080             :         DCHECK(destination->IsFPRegister());
    4081      540202 :         XMMRegister dst = g.ToDoubleRegister(destination);
    4082      540202 :         if (src.type() == Constant::kFloat32) {
    4083             :           // TODO(turbofan): Can we do better here?
    4084      125229 :           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
    4085             :         } else {
    4086             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4087      414973 :           __ Move(dst, src.ToFloat64().AsUint64());
    4088             :         }
    4089             :       }
    4090             :       return;
    4091             :     }
    4092             :     case MoveType::kConstantToStack: {
    4093       45200 :       Constant src = g.ToConstant(source);
    4094       45200 :       Operand dst = g.ToOperand(destination);
    4095       45200 :       if (destination->IsStackSlot()) {
    4096       38645 :         MoveConstantToSlot(dst, src);
    4097             :       } else {
    4098             :         DCHECK(destination->IsFPStackSlot());
    4099        6555 :         if (src.type() == Constant::kFloat32) {
    4100        2980 :           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
    4101             :         } else {
    4102             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4103        3575 :           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
    4104             :           __ movq(dst, kScratchRegister);
    4105             :         }
    4106             :       }
    4107             :       return;
    4108             :     }
    4109             :   }
    4110           0 :   UNREACHABLE();
    4111             : }
    4112             : 
    4113       77532 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
    4114             :                                  InstructionOperand* destination) {
    4115             :   X64OperandConverter g(this, nullptr);
    4116             :   // Dispatch on the source and destination operand kinds.  Not all
    4117             :   // combinations are possible.
    4118       77532 :   switch (MoveType::InferSwap(source, destination)) {
    4119             :     case MoveType::kRegisterToRegister: {
    4120       66932 :       if (source->IsRegister()) {
    4121             :         Register src = g.ToRegister(source);
    4122             :         Register dst = g.ToRegister(destination);
    4123       64281 :         __ movq(kScratchRegister, src);
    4124             :         __ movq(src, dst);
    4125             :         __ movq(dst, kScratchRegister);
    4126             :       } else {
    4127             :         DCHECK(source->IsFPRegister());
    4128             :         XMMRegister src = g.ToDoubleRegister(source);
    4129             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4130             :         __ Movapd(kScratchDoubleReg, src);
    4131             :         __ Movapd(src, dst);
    4132             :         __ Movapd(dst, kScratchDoubleReg);
    4133             :       }
    4134             :       return;
    4135             :     }
    4136             :     case MoveType::kRegisterToStack: {
    4137        6603 :       if (source->IsRegister()) {
    4138             :         Register src = g.ToRegister(source);
    4139        1599 :         __ pushq(src);
    4140             :         frame_access_state()->IncreaseSPDelta(1);
    4141        1599 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4142        1599 :                                                          kSystemPointerSize);
    4143             :         __ movq(src, g.ToOperand(destination));
    4144             :         frame_access_state()->IncreaseSPDelta(-1);
    4145        1599 :         __ popq(g.ToOperand(destination));
    4146             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4147        1599 :                                                          -kSystemPointerSize);
    4148             :       } else {
    4149             :         DCHECK(source->IsFPRegister());
    4150             :         XMMRegister src = g.ToDoubleRegister(source);
    4151        5004 :         Operand dst = g.ToOperand(destination);
    4152             :         MachineRepresentation rep =
    4153             :             LocationOperand::cast(source)->representation();
    4154        5004 :         if (rep != MachineRepresentation::kSimd128) {
    4155             :           __ Movsd(kScratchDoubleReg, src);
    4156             :           __ Movsd(src, dst);
    4157             :           __ Movsd(dst, kScratchDoubleReg);
    4158             :         } else {
    4159             :           __ Movups(kScratchDoubleReg, src);
    4160             :           __ Movups(src, dst);
    4161             :           __ Movups(dst, kScratchDoubleReg);
    4162             :         }
    4163             :       }
    4164             :       return;
    4165             :     }
    4166             :     case MoveType::kStackToStack: {
    4167        3997 :       Operand src = g.ToOperand(source);
    4168        3997 :       Operand dst = g.ToOperand(destination);
    4169             :       MachineRepresentation rep =
    4170             :           LocationOperand::cast(source)->representation();
    4171        3997 :       if (rep != MachineRepresentation::kSimd128) {
    4172             :         Register tmp = kScratchRegister;
    4173        3201 :         __ movq(tmp, dst);
    4174        3201 :         __ pushq(src);  // Then use stack to copy src to destination.
    4175        3201 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4176        3201 :                                                          kSystemPointerSize);
    4177        3201 :         __ popq(dst);
    4178             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4179        3201 :                                                          -kSystemPointerSize);
    4180             :         __ movq(src, tmp);
    4181             :       } else {
    4182             :         // Without AVX, misaligned reads and writes will trap. Move using the
    4183             :         // stack, in two parts.
    4184         796 :         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
    4185         796 :         __ pushq(src);  // Then use stack to copy src to destination.
    4186         796 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4187         796 :                                                          kSystemPointerSize);
    4188         796 :         __ popq(dst);
    4189             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4190         796 :                                                          -kSystemPointerSize);
    4191         796 :         __ pushq(g.ToOperand(source, kSystemPointerSize));
    4192             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4193         796 :                                                          kSystemPointerSize);
    4194         796 :         __ popq(g.ToOperand(destination, kSystemPointerSize));
    4195             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4196         796 :                                                          -kSystemPointerSize);
    4197         796 :         __ movups(src, kScratchDoubleReg);
    4198             :       }
    4199             :       return;
    4200             :     }
    4201             :     default:
    4202           0 :       UNREACHABLE();
    4203             :       break;
    4204             :   }
    4205             : }
    4206             : 
    4207         315 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
    4208      399611 :   for (size_t index = 0; index < target_count; ++index) {
    4209      199648 :     __ dq(targets[index]);
    4210             :   }
    4211         315 : }
    4212             : 
    4213             : #undef __
    4214             : 
    4215             : }  // namespace compiler
    4216             : }  // namespace internal
    4217      122036 : }  // namespace v8

Generated by: LCOV version 1.10