LCOV - code coverage report
Current view: top level - src/compiler/backend/x64 - code-generator-x64.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1423 1610 88.4 %
Date: 2019-02-19 Functions: 42 59 71.2 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/compiler/backend/code-generator.h"
       6             : 
       7             : #include <limits>
       8             : 
       9             : #include "src/base/overflowing-math.h"
      10             : #include "src/compiler/backend/code-generator-impl.h"
      11             : #include "src/compiler/backend/gap-resolver.h"
      12             : #include "src/compiler/node-matchers.h"
      13             : #include "src/compiler/osr.h"
      14             : #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
      15             : #include "src/macro-assembler.h"
      16             : #include "src/objects/smi.h"
      17             : #include "src/optimized-compilation-info.h"
      18             : #include "src/wasm/wasm-code-manager.h"
      19             : #include "src/wasm/wasm-objects.h"
      20             : #include "src/x64/assembler-x64.h"
      21             : 
      22             : namespace v8 {
      23             : namespace internal {
      24             : namespace compiler {
      25             : 
      26             : #define __ tasm()->
      27             : 
      28             : // Adds X64 specific methods for decoding operands.
      29             : class X64OperandConverter : public InstructionOperandConverter {
      30             :  public:
      31             :   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
      32             :       : InstructionOperandConverter(gen, instr) {}
      33             : 
      34             :   Immediate InputImmediate(size_t index) {
      35     4961201 :     return ToImmediate(instr_->InputAt(index));
      36             :   }
      37             : 
      38     1067332 :   Operand InputOperand(size_t index, int extra = 0) {
      39     3201998 :     return ToOperand(instr_->InputAt(index), extra);
      40             :   }
      41             : 
      42           0 :   Operand OutputOperand() { return ToOperand(instr_->Output()); }
      43             : 
      44     3938198 :   Immediate ToImmediate(InstructionOperand* operand) {
      45     3938198 :     Constant constant = ToConstant(operand);
      46     3938218 :     if (constant.type() == Constant::kFloat64) {
      47             :       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
      48        2183 :       return Immediate(0);
      49             :     }
      50     3936035 :     if (RelocInfo::IsWasmReference(constant.rmode())) {
      51           0 :       return Immediate(constant.ToInt32(), constant.rmode());
      52             :     }
      53     3936035 :     return Immediate(constant.ToInt32());
      54             :   }
      55             : 
      56             :   Operand ToOperand(InstructionOperand* op, int extra = 0) {
      57             :     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
      58    14353980 :     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
      59             :   }
      60             : 
      61    14366060 :   Operand SlotToOperand(int slot_index, int extra = 0) {
      62    28732120 :     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
      63             :     return Operand(offset.from_stack_pointer() ? rsp : rbp,
      64    28732120 :                    offset.offset() + extra);
      65             :   }
      66             : 
      67             :   static size_t NextOffset(size_t* offset) {
      68    15226274 :     size_t i = *offset;
      69    27695465 :     (*offset)++;
      70             :     return i;
      71             :   }
      72             : 
      73             :   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
      74             :     STATIC_ASSERT(0 == static_cast<int>(times_1));
      75             :     STATIC_ASSERT(1 == static_cast<int>(times_2));
      76             :     STATIC_ASSERT(2 == static_cast<int>(times_4));
      77             :     STATIC_ASSERT(3 == static_cast<int>(times_8));
      78     1423067 :     int scale = static_cast<int>(mode - one);
      79             :     DCHECK(scale >= 0 && scale < 4);
      80     1423067 :     return static_cast<ScaleFactor>(scale);
      81             :   }
      82             : 
      83    15226274 :   Operand MemoryOperand(size_t* offset) {
      84    15226274 :     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
      85    15226274 :     switch (mode) {
      86             :       case kMode_MR: {
      87    16799006 :         Register base = InputRegister(NextOffset(offset));
      88             :         int32_t disp = 0;
      89     1795612 :         return Operand(base, disp);
      90             :       }
      91             :       case kMode_MRI: {
      92    10531249 :         Register base = InputRegister(NextOffset(offset));
      93    10531249 :         int32_t disp = InputInt32(NextOffset(offset));
      94    10531275 :         return Operand(base, disp);
      95             :       }
      96             :       case kMode_MR1:
      97             :       case kMode_MR2:
      98             :       case kMode_MR4:
      99             :       case kMode_MR8: {
     100      715168 :         Register base = InputRegister(NextOffset(offset));
     101      715168 :         Register index = InputRegister(NextOffset(offset));
     102             :         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
     103             :         int32_t disp = 0;
     104      715168 :         return Operand(base, index, scale, disp);
     105             :       }
     106             :       case kMode_MR1I:
     107             :       case kMode_MR2I:
     108             :       case kMode_MR4I:
     109             :       case kMode_MR8I: {
     110      538298 :         Register base = InputRegister(NextOffset(offset));
     111      538298 :         Register index = InputRegister(NextOffset(offset));
     112             :         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
     113      538298 :         int32_t disp = InputInt32(NextOffset(offset));
     114      538302 :         return Operand(base, index, scale, disp);
     115             :       }
     116             :       case kMode_M1: {
     117           0 :         Register base = InputRegister(NextOffset(offset));
     118             :         int32_t disp = 0;
     119           0 :         return Operand(base, disp);
     120             :       }
     121             :       case kMode_M2:
     122           0 :         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
     123             :         return Operand(no_reg, 0);
     124             :       case kMode_M4:
     125             :       case kMode_M8: {
     126       23423 :         Register index = InputRegister(NextOffset(offset));
     127             :         ScaleFactor scale = ScaleFor(kMode_M1, mode);
     128             :         int32_t disp = 0;
     129       23423 :         return Operand(index, scale, disp);
     130             :       }
     131             :       case kMode_M1I:
     132             :       case kMode_M2I:
     133             :       case kMode_M4I:
     134             :       case kMode_M8I: {
     135      146178 :         Register index = InputRegister(NextOffset(offset));
     136             :         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
     137      146178 :         int32_t disp = InputInt32(NextOffset(offset));
     138      146178 :         return Operand(index, scale, disp);
     139             :       }
     140             :       case kMode_Root: {
     141     1476346 :         Register base = kRootRegister;
     142     1476346 :         int32_t disp = InputInt32(NextOffset(offset));
     143     1476347 :         return Operand(base, disp);
     144             :       }
     145             :       case kMode_None:
     146           0 :         UNREACHABLE();
     147             :     }
     148           0 :     UNREACHABLE();
     149             :   }
     150             : 
     151             :   Operand MemoryOperand(size_t first_input = 0) {
     152     8169070 :     return MemoryOperand(&first_input);
     153             :   }
     154             : };
     155             : 
     156             : namespace {
     157             : 
     158             : bool HasImmediateInput(Instruction* instr, size_t index) {
     159    20369263 :   return instr->InputAt(index)->IsImmediate();
     160             : }
     161             : 
     162           0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
     163             :  public:
     164             :   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
     165         132 :       : OutOfLineCode(gen), result_(result) {}
     166             : 
     167         132 :   void Generate() final {
     168         264 :     __ Xorps(result_, result_);
     169             :     __ Divss(result_, result_);
     170         132 :   }
     171             : 
     172             :  private:
     173             :   XMMRegister const result_;
     174             : };
     175             : 
     176           0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
     177             :  public:
     178             :   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
     179         585 :       : OutOfLineCode(gen), result_(result) {}
     180             : 
     181         585 :   void Generate() final {
     182        1170 :     __ Xorpd(result_, result_);
     183             :     __ Divsd(result_, result_);
     184         585 :   }
     185             : 
     186             :  private:
     187             :   XMMRegister const result_;
     188             : };
     189             : 
     190           0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
     191             :  public:
     192      104672 :   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
     193             :                              XMMRegister input, StubCallMode stub_mode,
     194             :                              UnwindingInfoWriter* unwinding_info_writer)
     195             :       : OutOfLineCode(gen),
     196             :         result_(result),
     197             :         input_(input),
     198             :         stub_mode_(stub_mode),
     199             :         unwinding_info_writer_(unwinding_info_writer),
     200             :         isolate_(gen->isolate()),
     201      157012 :         zone_(gen->zone()) {}
     202             : 
     203       52330 :   void Generate() final {
     204      313997 :     __ subq(rsp, Immediate(kDoubleSize));
     205             :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     206      104662 :                                                       kDoubleSize);
     207      104663 :     __ Movsd(MemOperand(rsp, 0), input_);
     208       52331 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     209             :       // A direct call to a wasm runtime stub defined in this module.
     210             :       // Just encode the stub index. This will be patched when the code
     211             :       // is added to the native module and copied into wasm code space.
     212        1445 :       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
     213             :     } else {
     214      101774 :       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     215             :     }
     216      157001 :     __ movl(result_, MemOperand(rsp, 0));
     217       52333 :     __ addq(rsp, Immediate(kDoubleSize));
     218             :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     219      104666 :                                                       -kDoubleSize);
     220       52331 :   }
     221             : 
     222             :  private:
     223             :   Register const result_;
     224             :   XMMRegister const input_;
     225             :   StubCallMode stub_mode_;
     226             :   UnwindingInfoWriter* const unwinding_info_writer_;
     227             :   Isolate* isolate_;
     228             :   Zone* zone_;
     229             : };
     230             : 
     231           0 : class OutOfLineRecordWrite final : public OutOfLineCode {
     232             :  public:
     233      325633 :   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
     234             :                        Register value, Register scratch0, Register scratch1,
     235             :                        RecordWriteMode mode, StubCallMode stub_mode)
     236             :       : OutOfLineCode(gen),
     237             :         object_(object),
     238             :         operand_(operand),
     239             :         value_(value),
     240             :         scratch0_(scratch0),
     241             :         scratch1_(scratch1),
     242             :         mode_(mode),
     243             :         stub_mode_(stub_mode),
     244      651266 :         zone_(gen->zone()) {}
     245             : 
     246      325633 :   void Generate() final {
     247      325633 :     if (mode_ > RecordWriteMode::kValueIsPointer) {
     248     1559316 :       __ JumpIfSmi(value_, exit());
     249             :     }
     250             :     __ CheckPageFlag(value_, scratch0_,
     251             :                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
     252      651266 :                      exit());
     253      325633 :     __ leaq(scratch1_, operand_);
     254             : 
     255             :     RememberedSetAction const remembered_set_action =
     256             :         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
     257      325633 :                                              : OMIT_REMEMBERED_SET;
     258             :     SaveFPRegsMode const save_fp_mode =
     259      651266 :         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
     260             : 
     261      325633 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     262             :       // A direct call to a wasm runtime stub defined in this module.
     263             :       // Just encode the stub index. This will be patched when the code
     264             :       // is added to the native module and copied into wasm code space.
     265             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     266         152 :                              save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
     267             :     } else {
     268             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     269      325481 :                              save_fp_mode);
     270             :     }
     271      325633 :   }
     272             : 
     273             :  private:
     274             :   Register const object_;
     275             :   Operand const operand_;
     276             :   Register const value_;
     277             :   Register const scratch0_;
     278             :   Register const scratch1_;
     279             :   RecordWriteMode const mode_;
     280             :   StubCallMode const stub_mode_;
     281             :   Zone* zone_;
     282             : };
     283             : 
     284           0 : class WasmOutOfLineTrap : public OutOfLineCode {
     285             :  public:
     286             :   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
     287      271662 :       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
     288             : 
     289       33443 :   void Generate() override {
     290       66886 :     X64OperandConverter i(gen_, instr_);
     291             :     TrapId trap_id =
     292       33443 :         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
     293             :     GenerateWithTrapId(trap_id);
     294       33451 :   }
     295             : 
     296             :  protected:
     297             :   CodeGenerator* gen_;
     298             : 
     299      271599 :   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
     300             : 
     301             :  private:
     302      271592 :   void GenerateCallToTrap(TrapId trap_id) {
     303      543253 :     if (!gen_->wasm_runtime_exception_support()) {
     304             :       // We cannot test calls to the runtime in cctest/test-run-wasm.
     305             :       // Therefore we emit a call to C here instead of a call to the runtime.
     306      849874 :       __ PrepareCallCFunction(0);
     307             :       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
     308      306384 :                        0);
     309      153192 :       __ LeaveFrame(StackFrame::WASM_COMPILED);
     310      306384 :       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
     311             :       size_t pop_size =
     312      153192 :           call_descriptor->StackParameterCount() * kSystemPointerSize;
     313             :       // Use rcx as a scratch register, we return anyways immediately.
     314      306384 :       __ Ret(static_cast<int>(pop_size), rcx);
     315             :     } else {
     316      118424 :       gen_->AssembleSourcePosition(instr_);
     317             :       // A direct call to a wasm runtime stub defined in this module.
     318             :       // Just encode the stub index. This will be patched when the code
     319             :       // is added to the native module and copied into wasm code space.
     320      236998 :       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
     321             :       ReferenceMap* reference_map =
     322      118469 :           new (gen_->zone()) ReferenceMap(gen_->zone());
     323             :       gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
     324      118516 :                             Safepoint::kNoLazyDeopt);
     325      118607 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
     326             :     }
     327      271712 :   }
     328             : 
     329             :   Instruction* instr_;
     330             : };
     331             : 
     332           0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
     333             :  public:
     334             :   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
     335      238102 :       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
     336             : 
     337      238146 :   void Generate() final {
     338      476292 :     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
     339      238150 :     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
     340      238261 :   }
     341             : 
     342             :  private:
     343             :   int pc_;
     344             : };
     345             : 
     346    11355741 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
     347             :                          InstructionCode opcode, Instruction* instr,
     348             :                          X64OperandConverter& i, int pc) {
     349             :   const MemoryAccessMode access_mode =
     350    11355741 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     351    11355741 :   if (access_mode == kMemoryAccessProtected) {
     352             :     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
     353             :   }
     354    11355630 : }
     355             : 
     356    10697424 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
     357             :                                    InstructionCode opcode, Instruction* instr,
     358             :                                    X64OperandConverter& i) {
     359             :   const MemoryAccessMode access_mode =
     360    10697424 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     361    10697424 :   if (access_mode == kMemoryAccessPoisoned) {
     362           0 :     Register value = i.OutputRegister();
     363           0 :     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
     364             :   }
     365    10697424 : }
     366             : 
     367             : }  // namespace
     368             : 
     369             : #define ASSEMBLE_UNOP(asm_instr)         \
     370             :   do {                                   \
     371             :     if (instr->Output()->IsRegister()) { \
     372             :       __ asm_instr(i.OutputRegister());  \
     373             :     } else {                             \
     374             :       __ asm_instr(i.OutputOperand());   \
     375             :     }                                    \
     376             :   } while (false)
     377             : 
     378             : #define ASSEMBLE_BINOP(asm_instr)                                     \
     379             :   do {                                                                \
     380             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     381             :       size_t index = 1;                                               \
     382             :       Operand right = i.MemoryOperand(&index);                        \
     383             :       __ asm_instr(i.InputRegister(0), right);                        \
     384             :     } else {                                                          \
     385             :       if (HasImmediateInput(instr, 1)) {                              \
     386             :         if (instr->InputAt(0)->IsRegister()) {                        \
     387             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     388             :         } else {                                                      \
     389             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     390             :         }                                                             \
     391             :       } else {                                                        \
     392             :         if (instr->InputAt(1)->IsRegister()) {                        \
     393             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     394             :         } else {                                                      \
     395             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     396             :         }                                                             \
     397             :       }                                                               \
     398             :     }                                                                 \
     399             :   } while (false)
     400             : 
     401             : #define ASSEMBLE_COMPARE(asm_instr)                                   \
     402             :   do {                                                                \
     403             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     404             :       size_t index = 0;                                               \
     405             :       Operand left = i.MemoryOperand(&index);                         \
     406             :       if (HasImmediateInput(instr, index)) {                          \
     407             :         __ asm_instr(left, i.InputImmediate(index));                  \
     408             :       } else {                                                        \
     409             :         __ asm_instr(left, i.InputRegister(index));                   \
     410             :       }                                                               \
     411             :     } else {                                                          \
     412             :       if (HasImmediateInput(instr, 1)) {                              \
     413             :         if (instr->InputAt(0)->IsRegister()) {                        \
     414             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     415             :         } else {                                                      \
     416             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     417             :         }                                                             \
     418             :       } else {                                                        \
     419             :         if (instr->InputAt(1)->IsRegister()) {                        \
     420             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     421             :         } else {                                                      \
     422             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     423             :         }                                                             \
     424             :       }                                                               \
     425             :     }                                                                 \
     426             :   } while (false)
     427             : 
     428             : #define ASSEMBLE_MULT(asm_instr)                              \
     429             :   do {                                                        \
     430             :     if (HasImmediateInput(instr, 1)) {                        \
     431             :       if (instr->InputAt(0)->IsRegister()) {                  \
     432             :         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
     433             :                      i.InputImmediate(1));                    \
     434             :       } else {                                                \
     435             :         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
     436             :                      i.InputImmediate(1));                    \
     437             :       }                                                       \
     438             :     } else {                                                  \
     439             :       if (instr->InputAt(1)->IsRegister()) {                  \
     440             :         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
     441             :       } else {                                                \
     442             :         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
     443             :       }                                                       \
     444             :     }                                                         \
     445             :   } while (false)
     446             : 
     447             : #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
     448             :   do {                                                                     \
     449             :     if (HasImmediateInput(instr, 1)) {                                     \
     450             :       if (instr->Output()->IsRegister()) {                                 \
     451             :         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
     452             :       } else {                                                             \
     453             :         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
     454             :       }                                                                    \
     455             :     } else {                                                               \
     456             :       if (instr->Output()->IsRegister()) {                                 \
     457             :         __ asm_instr##_cl(i.OutputRegister());                             \
     458             :       } else {                                                             \
     459             :         __ asm_instr##_cl(i.OutputOperand());                              \
     460             :       }                                                                    \
     461             :     }                                                                      \
     462             :   } while (false)
     463             : 
     464             : #define ASSEMBLE_MOVX(asm_instr)                            \
     465             :   do {                                                      \
     466             :     if (instr->addressing_mode() != kMode_None) {           \
     467             :       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
     468             :     } else if (instr->InputAt(0)->IsRegister()) {           \
     469             :       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     470             :     } else {                                                \
     471             :       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
     472             :     }                                                       \
     473             :   } while (false)
     474             : 
     475             : #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
     476             :   do {                                                                  \
     477             :     if (instr->InputAt(1)->IsFPRegister()) {                            \
     478             :       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
     479             :     } else {                                                            \
     480             :       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
     481             :     }                                                                   \
     482             :   } while (false)
     483             : 
     484             : #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
     485             :   do {                                                                  \
     486             :     if (instr->InputAt(0)->IsFPRegister()) {                            \
     487             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
     488             :     } else {                                                            \
     489             :       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
     490             :     }                                                                   \
     491             :   } while (false)
     492             : 
     493             : #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
     494             :   do {                                                                 \
     495             :     CpuFeatureScope avx_scope(tasm(), AVX);                            \
     496             :     if (instr->InputAt(1)->IsFPRegister()) {                           \
     497             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     498             :                    i.InputDoubleRegister(1));                          \
     499             :     } else {                                                           \
     500             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     501             :                    i.InputOperand(1));                                 \
     502             :     }                                                                  \
     503             :   } while (false)
     504             : 
     505             : #define ASSEMBLE_IEEE754_BINOP(name)                                     \
     506             :   do {                                                                   \
     507             :     __ PrepareCallCFunction(2);                                          \
     508             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
     509             :   } while (false)
     510             : 
     511             : #define ASSEMBLE_IEEE754_UNOP(name)                                      \
     512             :   do {                                                                   \
     513             :     __ PrepareCallCFunction(1);                                          \
     514             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
     515             :   } while (false)
     516             : 
     517             : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     518             :   do {                                                          \
     519             :     Label binop;                                                \
     520             :     __ bind(&binop);                                            \
     521             :     __ mov_inst(rax, i.MemoryOperand(1));                       \
     522             :     __ movl(i.TempRegister(0), rax);                            \
     523             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
     524             :     __ lock();                                                  \
     525             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
     526             :     __ j(not_equal, &binop);                                    \
     527             :   } while (false)
     528             : 
     529             : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     530             :   do {                                                            \
     531             :     Label binop;                                                  \
     532             :     __ bind(&binop);                                              \
     533             :     __ mov_inst(rax, i.MemoryOperand(1));                         \
     534             :     __ movq(i.TempRegister(0), rax);                              \
     535             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
     536             :     __ lock();                                                    \
     537             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
     538             :     __ j(not_equal, &binop);                                      \
     539             :   } while (false)
     540             : 
     541             : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
     542             :   do {                                                       \
     543             :     if (instr->InputAt(index)->IsSimd128Register()) {        \
     544             :       __ opcode(dst_operand, i.InputSimd128Register(index)); \
     545             :     } else {                                                 \
     546             :       __ opcode(dst_operand, i.InputOperand(index));         \
     547             :     }                                                        \
     548             :   } while (false)
     549             : 
     550             : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
     551             :   do {                                                            \
     552             :     if (instr->InputAt(index)->IsSimd128Register()) {             \
     553             :       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
     554             :     } else {                                                      \
     555             :       __ opcode(dst_operand, i.InputOperand(index), imm);         \
     556             :     }                                                             \
     557             :   } while (false)
     558             : 
     559             : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
     560             :   do {                                                   \
     561             :     XMMRegister dst = i.OutputSimd128Register();         \
     562             :     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
     563             :     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
     564             :     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
     565             :   } while (false)
     566             : 
     567             : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)                  \
     568             :   do {                                                                    \
     569             :     CpuFeatureScope sse_scope(tasm(), SSELevel);                          \
     570             :     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
     571             :     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
     572             :   } while (false)
     573             : 
     574     2150780 : void CodeGenerator::AssembleDeconstructFrame() {
     575     4301560 :   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
     576     2150850 :   __ movq(rsp, rbp);
     577     2150936 :   __ popq(rbp);
     578     2150955 : }
     579             : 
     580      238592 : void CodeGenerator::AssemblePrepareTailCall() {
     581      119296 :   if (frame_access_state()->has_frame()) {
     582      141056 :     __ movq(rbp, MemOperand(rbp, 0));
     583             :   }
     584             :   frame_access_state()->SetFrameAccessToSP();
     585      119296 : }
     586             : 
     587        1344 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
     588             :                                                      Register scratch1,
     589             :                                                      Register scratch2,
     590             :                                                      Register scratch3) {
     591             :   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
     592        1344 :   Label done;
     593             : 
     594             :   // Check if current frame is an arguments adaptor frame.
     595             :   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
     596        1344 :           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
     597        1344 :   __ j(not_equal, &done, Label::kNear);
     598             : 
     599             :   // Load arguments count from current arguments adaptor frame (note, it
     600             :   // does not include receiver).
     601        1344 :   Register caller_args_count_reg = scratch1;
     602             :   __ SmiUntag(caller_args_count_reg,
     603        1344 :               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
     604             : 
     605             :   ParameterCount callee_args_count(args_reg);
     606             :   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
     607        1344 :                         scratch3);
     608        1344 :   __ bind(&done);
     609        1344 : }
     610             : 
     611             : namespace {
     612             : 
     613      267608 : void AdjustStackPointerForTailCall(Assembler* assembler,
     614             :                                    FrameAccessState* state,
     615             :                                    int new_slot_above_sp,
     616             :                                    bool allow_shrinkage = true) {
     617             :   int current_sp_offset = state->GetSPToFPSlotCount() +
     618      267608 :                           StandardFrameConstants::kFixedSlotCountAboveFp;
     619      267608 :   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
     620      267608 :   if (stack_slot_delta > 0) {
     621        1584 :     assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
     622             :     state->IncreaseSPDelta(stack_slot_delta);
     623      266816 :   } else if (allow_shrinkage && stack_slot_delta < 0) {
     624      140336 :     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
     625             :     state->IncreaseSPDelta(stack_slot_delta);
     626             :   }
     627      267608 : }
     628             : 
     629        2336 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
     630        2336 :   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
     631        2336 :   assembler->movq(kScratchRegister, shuffle_mask);
     632        2336 :   assembler->Push(kScratchRegister);
     633        2336 :   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
     634        2336 :   assembler->movq(kScratchRegister, shuffle_mask);
     635        2336 :   assembler->Push(kScratchRegister);
     636        2336 : }
     637             : 
     638             : }  // namespace
     639             : 
     640      119308 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
     641      296600 :                                               int first_unused_stack_slot) {
     642      119308 :   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
     643             :   ZoneVector<MoveOperands*> pushes(zone());
     644      119308 :   GetPushCompatibleMoves(instr, flags, &pushes);
     645             : 
     646      131408 :   if (!pushes.empty() &&
     647       24200 :       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
     648             :        first_unused_stack_slot)) {
     649             :     X64OperandConverter g(this, instr);
     650       53192 :     for (auto move : pushes) {
     651             :       LocationOperand destination_location(
     652             :           LocationOperand::cast(move->destination()));
     653       28992 :       InstructionOperand source(move->source());
     654       28992 :       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     655       28992 :                                     destination_location.index());
     656       28992 :       if (source.IsStackSlot()) {
     657             :         LocationOperand source_location(LocationOperand::cast(source));
     658       12112 :         __ Push(g.SlotToOperand(source_location.index()));
     659       16880 :       } else if (source.IsRegister()) {
     660             :         LocationOperand source_location(LocationOperand::cast(source));
     661       16880 :         __ Push(source_location.GetRegister());
     662           0 :       } else if (source.IsImmediate()) {
     663           0 :         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
     664             :       } else {
     665             :         // Pushes of non-scalar data types is not supported.
     666           0 :         UNIMPLEMENTED();
     667             :       }
     668             :       frame_access_state()->IncreaseSPDelta(1);
     669             :       move->Eliminate();
     670             :     }
     671             :   }
     672      119308 :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     673      119308 :                                 first_unused_stack_slot, false);
     674      119308 : }
     675             : 
     676      119308 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
     677      119308 :                                              int first_unused_stack_slot) {
     678      119308 :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     679      119308 :                                 first_unused_stack_slot);
     680      119308 : }
     681             : 
     682             : // Check that {kJavaScriptCallCodeStartRegister} is correct.
     683         110 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
     684         110 :   __ ComputeCodeStartAddress(rbx);
     685         110 :   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
     686         110 :   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
     687         110 : }
     688             : 
     689             : // Check if the code object is marked for deoptimization. If it is, then it
     690             : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
     691             : // to:
     692             : //    1. read from memory the word that contains that bit, which can be found in
     693             : //       the flags in the referenced {CodeDataContainer} object;
     694             : //    2. test kMarkedForDeoptimizationBit in those flags; and
     695             : //    3. if it is not zero then it jumps to the builtin.
     696      913364 : void CodeGenerator::BailoutIfDeoptimized() {
     697             :   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
     698             :   __ LoadTaggedPointerField(rbx,
     699      456683 :                             Operand(kJavaScriptCallCodeStartRegister, offset));
     700             :   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
     701      456685 :            Immediate(1 << Code::kMarkedForDeoptimizationBit));
     702             :   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
     703      456681 :           RelocInfo::CODE_TARGET, not_zero);
     704      456683 : }
     705             : 
     706           0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
     707             :   // Set a mask which has all bits set in the normal case, but has all
     708             :   // bits cleared if we are speculatively executing the wrong PC.
     709           0 :   __ ComputeCodeStartAddress(rbx);
     710           0 :   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
     711           0 :   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
     712             :   __ movq(rbx, Immediate(-1));
     713           0 :   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
     714           0 : }
     715             : 
     716           0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
     717           0 :   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
     718           0 :   __ andq(kContextRegister, kSpeculationPoisonRegister);
     719           0 :   __ andq(rsp, kSpeculationPoisonRegister);
     720           0 : }
     721             : 
     722             : // Assembles an instruction after register allocation, producing machine code.
     723    63172867 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     724    84106799 :     Instruction* instr) {
     725             :   X64OperandConverter i(this, instr);
     726             :   InstructionCode opcode = instr->opcode();
     727    63172867 :   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
     728    63172867 :   switch (arch_opcode) {
     729             :     case kArchCallCodeObject: {
     730     4711970 :       if (HasImmediateInput(instr, 0)) {
     731     4702936 :         Handle<Code> code = i.InputCode(0);
     732     4702936 :         __ Call(code, RelocInfo::CODE_TARGET);
     733             :       } else {
     734        9048 :         Register reg = i.InputRegister(0);
     735             :         DCHECK_IMPLIES(
     736             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     737             :             reg == kJavaScriptCallCodeStartRegister);
     738        9048 :         __ LoadCodeObjectEntry(reg, reg);
     739        9048 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     740           0 :           __ RetpolineCall(reg);
     741             :         } else {
     742        9048 :           __ call(reg);
     743             :         }
     744             :       }
     745     4711986 :       RecordCallPosition(instr);
     746             :       frame_access_state()->ClearSPDelta();
     747             :       break;
     748             :     }
     749             :     case kArchCallBuiltinPointer: {
     750             :       DCHECK(!HasImmediateInput(instr, 0));
     751        2488 :       Register builtin_pointer = i.InputRegister(0);
     752        2488 :       __ CallBuiltinPointer(builtin_pointer);
     753        2488 :       RecordCallPosition(instr);
     754             :       frame_access_state()->ClearSPDelta();
     755             :       break;
     756             :     }
     757             :     case kArchCallWasmFunction: {
     758      956186 :       if (HasImmediateInput(instr, 0)) {
     759       80790 :         Constant constant = i.ToConstant(instr->InputAt(0));
     760       80806 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     761       80806 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     762       80796 :           __ near_call(wasm_code, constant.rmode());
     763             :         } else {
     764           0 :           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     765           0 :             __ RetpolineCall(wasm_code, constant.rmode());
     766             :           } else {
     767           0 :             __ Call(wasm_code, constant.rmode());
     768             :           }
     769             :         }
     770             :       } else {
     771      875396 :         Register reg = i.InputRegister(0);
     772      875396 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     773           0 :           __ RetpolineCall(reg);
     774             :         } else {
     775      875396 :           __ call(reg);
     776             :         }
     777             :       }
     778      956199 :       RecordCallPosition(instr);
     779             :       frame_access_state()->ClearSPDelta();
     780             :       break;
     781             :     }
     782             :     case kArchTailCallCodeObjectFromJSFunction:
     783             :     case kArchTailCallCodeObject: {
     784       34568 :       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
     785             :         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
     786             :                                          i.TempRegister(0), i.TempRegister(1),
     787        1344 :                                          i.TempRegister(2));
     788             :       }
     789       34568 :       if (HasImmediateInput(instr, 0)) {
     790       29728 :         Handle<Code> code = i.InputCode(0);
     791       29728 :         __ Jump(code, RelocInfo::CODE_TARGET);
     792             :       } else {
     793        9680 :         Register reg = i.InputRegister(0);
     794             :         DCHECK_IMPLIES(
     795             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     796             :             reg == kJavaScriptCallCodeStartRegister);
     797        4840 :         __ LoadCodeObjectEntry(reg, reg);
     798        4840 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     799           0 :           __ RetpolineJump(reg);
     800             :         } else {
     801        4840 :           __ jmp(reg);
     802             :         }
     803             :       }
     804             :       unwinding_info_writer_.MarkBlockWillExit();
     805             :       frame_access_state()->ClearSPDelta();
     806       34568 :       frame_access_state()->SetFrameAccessToDefault();
     807       34568 :       break;
     808             :     }
     809             :     case kArchTailCallWasm: {
     810           0 :       if (HasImmediateInput(instr, 0)) {
     811           0 :         Constant constant = i.ToConstant(instr->InputAt(0));
     812           0 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     813           0 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     814           0 :           __ near_jmp(wasm_code, constant.rmode());
     815             :         } else {
     816             :           __ Move(kScratchRegister, wasm_code, constant.rmode());
     817           0 :           __ jmp(kScratchRegister);
     818             :         }
     819             :       } else {
     820           0 :         Register reg = i.InputRegister(0);
     821           0 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     822           0 :           __ RetpolineJump(reg);
     823             :         } else {
     824           0 :           __ jmp(reg);
     825             :         }
     826             :       }
     827             :       unwinding_info_writer_.MarkBlockWillExit();
     828             :       frame_access_state()->ClearSPDelta();
     829           0 :       frame_access_state()->SetFrameAccessToDefault();
     830           0 :       break;
     831             :     }
     832             :     case kArchTailCallAddress: {
     833       84728 :       CHECK(!HasImmediateInput(instr, 0));
     834       84728 :       Register reg = i.InputRegister(0);
     835             :       DCHECK_IMPLIES(
     836             :           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     837             :           reg == kJavaScriptCallCodeStartRegister);
     838       84728 :       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     839           0 :         __ RetpolineJump(reg);
     840             :       } else {
     841       84728 :         __ jmp(reg);
     842             :       }
     843             :       unwinding_info_writer_.MarkBlockWillExit();
     844             :       frame_access_state()->ClearSPDelta();
     845       84728 :       frame_access_state()->SetFrameAccessToDefault();
     846             :       break;
     847             :     }
     848             :     case kArchCallJSFunction: {
     849             :       Register func = i.InputRegister(0);
     850       23793 :       if (FLAG_debug_code) {
     851             :         // Check the function's context matches the context argument.
     852           8 :         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
     853           8 :         __ Assert(equal, AbortReason::kWrongFunctionContext);
     854             :       }
     855             :       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
     856             :       __ LoadTaggedPointerField(rcx,
     857       23793 :                                 FieldOperand(func, JSFunction::kCodeOffset));
     858       23793 :       __ CallCodeObject(rcx);
     859             :       frame_access_state()->ClearSPDelta();
     860       23793 :       RecordCallPosition(instr);
     861             :       break;
     862             :     }
     863             :     case kArchPrepareCallCFunction: {
     864             :       // Frame alignment requires using FP-relative frame addressing.
     865             :       frame_access_state()->SetFrameAccessToFP();
     866       26201 :       int const num_parameters = MiscField::decode(instr->opcode());
     867       26201 :       __ PrepareCallCFunction(num_parameters);
     868       26201 :       break;
     869             :     }
     870             :     case kArchSaveCallerRegisters: {
     871             :       fp_mode_ =
     872         676 :           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
     873             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     874             :       // kReturnRegister0 should have been saved before entering the stub.
     875         676 :       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
     876             :       DCHECK(IsAligned(bytes, kSystemPointerSize));
     877             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     878         676 :       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     879             :       DCHECK(!caller_registers_saved_);
     880         676 :       caller_registers_saved_ = true;
     881         676 :       break;
     882             :     }
     883             :     case kArchRestoreCallerRegisters: {
     884             :       DCHECK(fp_mode_ ==
     885             :              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
     886             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     887             :       // Don't overwrite the returned value.
     888         676 :       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
     889         676 :       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
     890             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     891             :       DCHECK(caller_registers_saved_);
     892         676 :       caller_registers_saved_ = false;
     893         676 :       break;
     894             :     }
     895             :     case kArchPrepareTailCall:
     896      119296 :       AssemblePrepareTailCall();
     897      119296 :       break;
     898             :     case kArchCallCFunction: {
     899             :       int const num_parameters = MiscField::decode(instr->opcode());
     900       26201 :       if (HasImmediateInput(instr, 0)) {
     901       25061 :         ExternalReference ref = i.InputExternalReference(0);
     902       25061 :         __ CallCFunction(ref, num_parameters);
     903             :       } else {
     904        1140 :         Register func = i.InputRegister(0);
     905        1140 :         __ CallCFunction(func, num_parameters);
     906             :       }
     907       26201 :       frame_access_state()->SetFrameAccessToDefault();
     908             :       // Ideally, we should decrement SP delta to match the change of stack
     909             :       // pointer in CallCFunction. However, for certain architectures (e.g.
     910             :       // ARM), there may be more strict alignment requirement, causing old SP
     911             :       // to be saved on the stack. In those cases, we can not calculate the SP
     912             :       // delta statically.
     913             :       frame_access_state()->ClearSPDelta();
     914       26201 :       if (caller_registers_saved_) {
     915             :         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
     916             :         // Here, we assume the sequence to be:
     917             :         //   kArchSaveCallerRegisters;
     918             :         //   kArchCallCFunction;
     919             :         //   kArchRestoreCallerRegisters;
     920             :         int bytes =
     921         676 :             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
     922         676 :         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     923             :       }
     924             :       // TODO(tebbi): Do we need an lfence here?
     925             :       break;
     926             :     }
     927             :     case kArchJmp:
     928     4930087 :       AssembleArchJump(i.InputRpo(0));
     929     4930082 :       break;
     930             :     case kArchBinarySearchSwitch:
     931       34722 :       AssembleArchBinarySearchSwitch(instr);
     932       34722 :       break;
     933             :     case kArchLookupSwitch:
     934           0 :       AssembleArchLookupSwitch(instr);
     935           0 :       break;
     936             :     case kArchTableSwitch:
     937         314 :       AssembleArchTableSwitch(instr);
     938         314 :       break;
     939             :     case kArchComment:
     940           4 :       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
     941           4 :       break;
     942             :     case kArchDebugAbort:
     943             :       DCHECK(i.InputRegister(0) == rdx);
     944         152 :       if (!frame_access_state()->has_frame()) {
     945             :         // We don't actually want to generate a pile of code for this, so just
     946             :         // claim there is a stack frame, without generating one.
     947           8 :         FrameScope scope(tasm(), StackFrame::NONE);
     948             :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     949           8 :                 RelocInfo::CODE_TARGET);
     950             :       } else {
     951             :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     952         144 :                 RelocInfo::CODE_TARGET);
     953             :       }
     954         152 :       __ int3();
     955             :       unwinding_info_writer_.MarkBlockWillExit();
     956             :       break;
     957             :     case kArchDebugBreak:
     958      358834 :       __ int3();
     959      358834 :       break;
     960             :     case kArchThrowTerminator:
     961             :       unwinding_info_writer_.MarkBlockWillExit();
     962             :       break;
     963             :     case kArchNop:
     964             :       // don't emit code for nops.
     965             :       break;
     966             :     case kArchDeoptimize: {
     967             :       int deopt_state_id =
     968       45611 :           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
     969             :       CodeGenResult result =
     970       45611 :           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
     971       45611 :       if (result != kSuccess) return result;
     972             :       unwinding_info_writer_.MarkBlockWillExit();
     973             :       break;
     974             :     }
     975             :     case kArchRet:
     976     2426466 :       AssembleReturn(instr->InputAt(0));
     977     2426457 :       break;
     978             :     case kArchStackPointer:
     979           0 :       __ movq(i.OutputRegister(), rsp);
     980             :       break;
     981             :     case kArchFramePointer:
     982       31038 :       __ movq(i.OutputRegister(), rbp);
     983             :       break;
     984             :     case kArchParentFramePointer:
     985       50404 :       if (frame_access_state()->has_frame()) {
     986       80652 :         __ movq(i.OutputRegister(), Operand(rbp, 0));
     987             :       } else {
     988       23520 :         __ movq(i.OutputRegister(), rbp);
     989             :       }
     990             :       break;
     991             :     case kArchTruncateDoubleToI: {
     992             :       auto result = i.OutputRegister();
     993             :       auto input = i.InputDoubleRegister(0);
     994             :       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
     995             :           this, result, input, DetermineStubCallMode(),
     996      104678 :           &unwinding_info_writer_);
     997             :       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
     998             :       // use of Cvttsd2siq requires the movl below to avoid sign extension.
     999       52336 :       __ Cvttsd2siq(result, input);
    1000       52340 :       __ cmpq(result, Immediate(1));
    1001       52340 :       __ j(overflow, ool->entry());
    1002       52341 :       __ bind(ool->exit());
    1003             :       __ movl(result, result);
    1004             :       break;
    1005             :     }
    1006             :     case kArchStoreWithWriteBarrier: {
    1007             :       RecordWriteMode mode =
    1008             :           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
    1009             :       Register object = i.InputRegister(0);
    1010      325633 :       size_t index = 0;
    1011      325633 :       Operand operand = i.MemoryOperand(&index);
    1012      325633 :       Register value = i.InputRegister(index);
    1013             :       Register scratch0 = i.TempRegister(0);
    1014             :       Register scratch1 = i.TempRegister(1);
    1015             :       auto ool = new (zone())
    1016             :           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
    1017      651266 :                                mode, DetermineStubCallMode());
    1018      325633 :       __ StoreTaggedField(operand, value);
    1019             :       __ CheckPageFlag(object, scratch0,
    1020             :                        MemoryChunk::kPointersFromHereAreInterestingMask,
    1021      325633 :                        not_zero, ool->entry());
    1022      325633 :       __ bind(ool->exit());
    1023             :       break;
    1024             :     }
    1025             :     case kArchWordPoisonOnSpeculation:
    1026             :       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
    1027           0 :       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
    1028           0 :       break;
    1029             :     case kLFence:
    1030           0 :       __ lfence();
    1031           0 :       break;
    1032             :     case kArchStackSlot: {
    1033             :       FrameOffset offset =
    1034        4918 :           frame_access_state()->GetFrameOffset(i.InputInt32(0));
    1035        2459 :       Register base = offset.from_stack_pointer() ? rsp : rbp;
    1036        7377 :       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
    1037             :       break;
    1038             :     }
    1039             :     case kIeee754Float64Acos:
    1040         116 :       ASSEMBLE_IEEE754_UNOP(acos);
    1041         116 :       break;
    1042             :     case kIeee754Float64Acosh:
    1043         116 :       ASSEMBLE_IEEE754_UNOP(acosh);
    1044         116 :       break;
    1045             :     case kIeee754Float64Asin:
    1046         116 :       ASSEMBLE_IEEE754_UNOP(asin);
    1047         116 :       break;
    1048             :     case kIeee754Float64Asinh:
    1049         116 :       ASSEMBLE_IEEE754_UNOP(asinh);
    1050         116 :       break;
    1051             :     case kIeee754Float64Atan:
    1052         133 :       ASSEMBLE_IEEE754_UNOP(atan);
    1053         133 :       break;
    1054             :     case kIeee754Float64Atanh:
    1055         116 :       ASSEMBLE_IEEE754_UNOP(atanh);
    1056         116 :       break;
    1057             :     case kIeee754Float64Atan2:
    1058         129 :       ASSEMBLE_IEEE754_BINOP(atan2);
    1059         129 :       break;
    1060             :     case kIeee754Float64Cbrt:
    1061         116 :       ASSEMBLE_IEEE754_UNOP(cbrt);
    1062         116 :       break;
    1063             :     case kIeee754Float64Cos:
    1064         265 :       ASSEMBLE_IEEE754_UNOP(cos);
    1065         265 :       break;
    1066             :     case kIeee754Float64Cosh:
    1067         123 :       ASSEMBLE_IEEE754_UNOP(cosh);
    1068         123 :       break;
    1069             :     case kIeee754Float64Exp:
    1070         148 :       ASSEMBLE_IEEE754_UNOP(exp);
    1071         148 :       break;
    1072             :     case kIeee754Float64Expm1:
    1073         123 :       ASSEMBLE_IEEE754_UNOP(expm1);
    1074         123 :       break;
    1075             :     case kIeee754Float64Log:
    1076         284 :       ASSEMBLE_IEEE754_UNOP(log);
    1077         284 :       break;
    1078             :     case kIeee754Float64Log1p:
    1079         116 :       ASSEMBLE_IEEE754_UNOP(log1p);
    1080         116 :       break;
    1081             :     case kIeee754Float64Log2:
    1082         116 :       ASSEMBLE_IEEE754_UNOP(log2);
    1083         116 :       break;
    1084             :     case kIeee754Float64Log10:
    1085         116 :       ASSEMBLE_IEEE754_UNOP(log10);
    1086         116 :       break;
    1087             :     case kIeee754Float64Pow:
    1088         336 :       ASSEMBLE_IEEE754_BINOP(pow);
    1089         336 :       break;
    1090             :     case kIeee754Float64Sin:
    1091         268 :       ASSEMBLE_IEEE754_UNOP(sin);
    1092         268 :       break;
    1093             :     case kIeee754Float64Sinh:
    1094         123 :       ASSEMBLE_IEEE754_UNOP(sinh);
    1095         123 :       break;
    1096             :     case kIeee754Float64Tan:
    1097         168 :       ASSEMBLE_IEEE754_UNOP(tan);
    1098         168 :       break;
    1099             :     case kIeee754Float64Tanh:
    1100         123 :       ASSEMBLE_IEEE754_UNOP(tanh);
    1101         123 :       break;
    1102             :     case kX64Add32:
    1103      357994 :       ASSEMBLE_BINOP(addl);
    1104             :       break;
    1105             :     case kX64Add:
    1106      348496 :       ASSEMBLE_BINOP(addq);
    1107             :       break;
    1108             :     case kX64Sub32:
    1109      200812 :       ASSEMBLE_BINOP(subl);
    1110             :       break;
    1111             :     case kX64Sub:
    1112      227661 :       ASSEMBLE_BINOP(subq);
    1113             :       break;
    1114             :     case kX64And32:
    1115      851971 :       ASSEMBLE_BINOP(andl);
    1116             :       break;
    1117             :     case kX64And:
    1118     1238677 :       ASSEMBLE_BINOP(andq);
    1119             :       break;
    1120             :     case kX64Cmp8:
    1121       39955 :       ASSEMBLE_COMPARE(cmpb);
    1122             :       break;
    1123             :     case kX64Cmp16:
    1124     1603028 :       ASSEMBLE_COMPARE(cmpw);
    1125             :       break;
    1126             :     case kX64Cmp32:
    1127     4796358 :       ASSEMBLE_COMPARE(cmpl);
    1128             :       break;
    1129             :     case kX64Cmp:
    1130     9237396 :       ASSEMBLE_COMPARE(cmpq);
    1131             :       break;
    1132             :     case kX64Test8:
    1133      331384 :       ASSEMBLE_COMPARE(testb);
    1134             :       break;
    1135             :     case kX64Test16:
    1136      122375 :       ASSEMBLE_COMPARE(testw);
    1137             :       break;
    1138             :     case kX64Test32:
    1139      551256 :       ASSEMBLE_COMPARE(testl);
    1140             :       break;
    1141             :     case kX64Test:
    1142     3230226 :       ASSEMBLE_COMPARE(testq);
    1143             :       break;
    1144             :     case kX64Imul32:
    1145      203677 :       ASSEMBLE_MULT(imull);
    1146             :       break;
    1147             :     case kX64Imul:
    1148       83176 :       ASSEMBLE_MULT(imulq);
    1149             :       break;
    1150             :     case kX64ImulHigh32:
    1151        7522 :       if (instr->InputAt(1)->IsRegister()) {
    1152        3761 :         __ imull(i.InputRegister(1));
    1153             :       } else {
    1154           0 :         __ imull(i.InputOperand(1));
    1155             :       }
    1156             :       break;
    1157             :     case kX64UmulHigh32:
    1158        2874 :       if (instr->InputAt(1)->IsRegister()) {
    1159        1437 :         __ mull(i.InputRegister(1));
    1160             :       } else {
    1161           0 :         __ mull(i.InputOperand(1));
    1162             :       }
    1163             :       break;
    1164             :     case kX64Idiv32:
    1165       31991 :       __ cdq();
    1166       31991 :       __ idivl(i.InputRegister(1));
    1167             :       break;
    1168             :     case kX64Idiv:
    1169        2732 :       __ cqo();
    1170        2732 :       __ idivq(i.InputRegister(1));
    1171             :       break;
    1172             :     case kX64Udiv32:
    1173       29099 :       __ xorl(rdx, rdx);
    1174       29099 :       __ divl(i.InputRegister(1));
    1175             :       break;
    1176             :     case kX64Udiv:
    1177        1768 :       __ xorq(rdx, rdx);
    1178        1768 :       __ divq(i.InputRegister(1));
    1179             :       break;
    1180             :     case kX64Not:
    1181          88 :       ASSEMBLE_UNOP(notq);
    1182             :       break;
    1183             :     case kX64Not32:
    1184        5562 :       ASSEMBLE_UNOP(notl);
    1185             :       break;
    1186             :     case kX64Neg:
    1187       19448 :       ASSEMBLE_UNOP(negq);
    1188             :       break;
    1189             :     case kX64Neg32:
    1190       11860 :       ASSEMBLE_UNOP(negl);
    1191             :       break;
    1192             :     case kX64Or32:
    1193      344353 :       ASSEMBLE_BINOP(orl);
    1194             :       break;
    1195             :     case kX64Or:
    1196      374048 :       ASSEMBLE_BINOP(orq);
    1197             :       break;
    1198             :     case kX64Xor32:
    1199       92295 :       ASSEMBLE_BINOP(xorl);
    1200             :       break;
    1201             :     case kX64Xor:
    1202        1116 :       ASSEMBLE_BINOP(xorq);
    1203             :       break;
    1204             :     case kX64Shl32:
    1205      118111 :       ASSEMBLE_SHIFT(shll, 5);
    1206             :       break;
    1207             :     case kX64Shl:
    1208     1456216 :       ASSEMBLE_SHIFT(shlq, 6);
    1209             :       break;
    1210             :     case kX64Shr32:
    1211      430037 :       ASSEMBLE_SHIFT(shrl, 5);
    1212             :       break;
    1213             :     case kX64Shr:
    1214     1421058 :       ASSEMBLE_SHIFT(shrq, 6);
    1215             :       break;
    1216             :     case kX64Sar32:
    1217       96660 :       ASSEMBLE_SHIFT(sarl, 5);
    1218             :       break;
    1219             :     case kX64Sar:
    1220      749872 :       ASSEMBLE_SHIFT(sarq, 6);
    1221             :       break;
    1222             :     case kX64Ror32:
    1223      110735 :       ASSEMBLE_SHIFT(rorl, 5);
    1224             :       break;
    1225             :     case kX64Ror:
    1226         368 :       ASSEMBLE_SHIFT(rorq, 6);
    1227             :       break;
    1228             :     case kX64Lzcnt:
    1229          72 :       if (instr->InputAt(0)->IsRegister()) {
    1230          36 :         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
    1231             :       } else {
    1232           0 :         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
    1233             :       }
    1234             :       break;
    1235             :     case kX64Lzcnt32:
    1236         892 :       if (instr->InputAt(0)->IsRegister()) {
    1237         446 :         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
    1238             :       } else {
    1239           0 :         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
    1240             :       }
    1241             :       break;
    1242             :     case kX64Tzcnt:
    1243          88 :       if (instr->InputAt(0)->IsRegister()) {
    1244          44 :         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
    1245             :       } else {
    1246           0 :         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
    1247             :       }
    1248             :       break;
    1249             :     case kX64Tzcnt32:
    1250         664 :       if (instr->InputAt(0)->IsRegister()) {
    1251         332 :         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
    1252             :       } else {
    1253           0 :         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
    1254             :       }
    1255             :       break;
    1256             :     case kX64Popcnt:
    1257          88 :       if (instr->InputAt(0)->IsRegister()) {
    1258          44 :         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
    1259             :       } else {
    1260           0 :         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
    1261             :       }
    1262             :       break;
    1263             :     case kX64Popcnt32:
    1264         128 :       if (instr->InputAt(0)->IsRegister()) {
    1265          64 :         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
    1266             :       } else {
    1267           0 :         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
    1268             :       }
    1269             :       break;
    1270             :     case kX64Bswap:
    1271          12 :       __ bswapq(i.OutputRegister());
    1272          12 :       break;
    1273             :     case kX64Bswap32:
    1274          44 :       __ bswapl(i.OutputRegister());
    1275          44 :       break;
    1276             :     case kSSEFloat32Cmp:
    1277           0 :       ASSEMBLE_SSE_BINOP(Ucomiss);
    1278             :       break;
    1279             :     case kSSEFloat32Add:
    1280           0 :       ASSEMBLE_SSE_BINOP(addss);
    1281             :       break;
    1282             :     case kSSEFloat32Sub:
    1283           0 :       ASSEMBLE_SSE_BINOP(subss);
    1284             :       break;
    1285             :     case kSSEFloat32Mul:
    1286           0 :       ASSEMBLE_SSE_BINOP(mulss);
    1287             :       break;
    1288             :     case kSSEFloat32Div:
    1289           0 :       ASSEMBLE_SSE_BINOP(divss);
    1290             :       // Don't delete this mov. It may improve performance on some CPUs,
    1291             :       // when there is a (v)mulss depending on the result.
    1292           0 :       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1293           0 :       break;
    1294             :     case kSSEFloat32Abs: {
    1295             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1296           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1297           0 :       __ psrlq(kScratchDoubleReg, 33);
    1298           0 :       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1299           0 :       break;
    1300             :     }
    1301             :     case kSSEFloat32Neg: {
    1302             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1303           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1304           0 :       __ psllq(kScratchDoubleReg, 31);
    1305           0 :       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1306           0 :       break;
    1307             :     }
    1308             :     case kSSEFloat32Sqrt:
    1309         522 :       ASSEMBLE_SSE_UNOP(sqrtss);
    1310             :       break;
    1311             :     case kSSEFloat32ToFloat64:
    1312       63846 :       ASSEMBLE_SSE_UNOP(Cvtss2sd);
    1313             :       break;
    1314             :     case kSSEFloat32Round: {
    1315             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1316             :       RoundingMode const mode =
    1317             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1318             :       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1319             :       break;
    1320             :     }
    1321             :     case kSSEFloat32ToInt32:
    1322         696 :       if (instr->InputAt(0)->IsFPRegister()) {
    1323         348 :         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1324             :       } else {
    1325           0 :         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
    1326             :       }
    1327             :       break;
    1328             :     case kSSEFloat32ToUint32: {
    1329         112 :       if (instr->InputAt(0)->IsFPRegister()) {
    1330          56 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1331             :       } else {
    1332           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1333             :       }
    1334             :       break;
    1335             :     }
    1336             :     case kSSEFloat64Cmp:
    1337        3572 :       ASSEMBLE_SSE_BINOP(Ucomisd);
    1338             :       break;
    1339             :     case kSSEFloat64Add:
    1340         966 :       ASSEMBLE_SSE_BINOP(addsd);
    1341             :       break;
    1342             :     case kSSEFloat64Sub:
    1343         666 :       ASSEMBLE_SSE_BINOP(subsd);
    1344             :       break;
    1345             :     case kSSEFloat64Mul:
    1346         120 :       ASSEMBLE_SSE_BINOP(mulsd);
    1347             :       break;
    1348             :     case kSSEFloat64Div:
    1349          78 :       ASSEMBLE_SSE_BINOP(divsd);
    1350             :       // Don't delete this mov. It may improve performance on some CPUs,
    1351             :       // when there is a (v)mulsd depending on the result.
    1352          26 :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1353             :       break;
    1354             :     case kSSEFloat64Mod: {
    1355        1614 :       __ subq(rsp, Immediate(kDoubleSize));
    1356             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1357        3228 :                                                        kDoubleSize);
    1358             :       // Move values to st(0) and st(1).
    1359        4842 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
    1360        1614 :       __ fld_d(Operand(rsp, 0));
    1361        4842 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    1362        1614 :       __ fld_d(Operand(rsp, 0));
    1363             :       // Loop while fprem isn't done.
    1364        1614 :       Label mod_loop;
    1365        1614 :       __ bind(&mod_loop);
    1366             :       // This instructions traps on all kinds inputs, but we are assuming the
    1367             :       // floating point control word is set to ignore them all.
    1368        1614 :       __ fprem();
    1369             :       // The following 2 instruction implicitly use rax.
    1370        1614 :       __ fnstsw_ax();
    1371        1614 :       if (CpuFeatures::IsSupported(SAHF)) {
    1372             :         CpuFeatureScope sahf_scope(tasm(), SAHF);
    1373        1582 :         __ sahf();
    1374             :       } else {
    1375             :         __ shrl(rax, Immediate(8));
    1376          32 :         __ andl(rax, Immediate(0xFF));
    1377          32 :         __ pushq(rax);
    1378             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1379          64 :                                                          kSystemPointerSize);
    1380          32 :         __ popfq();
    1381             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1382          64 :                                                          -kSystemPointerSize);
    1383             :       }
    1384        1614 :       __ j(parity_even, &mod_loop);
    1385             :       // Move output to stack and clean up.
    1386        1614 :       __ fstp(1);
    1387        1614 :       __ fstp_d(Operand(rsp, 0));
    1388        3228 :       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
    1389        1614 :       __ addq(rsp, Immediate(kDoubleSize));
    1390             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1391        3228 :                                                        -kDoubleSize);
    1392             :       break;
    1393             :     }
    1394             :     case kSSEFloat32Max: {
    1395          66 :       Label compare_swap, done_compare;
    1396         132 :       if (instr->InputAt(1)->IsFPRegister()) {
    1397             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1398             :       } else {
    1399           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1400             :       }
    1401             :       auto ool =
    1402          66 :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1403          66 :       __ j(parity_even, ool->entry());
    1404          66 :       __ j(above, &done_compare, Label::kNear);
    1405          66 :       __ j(below, &compare_swap, Label::kNear);
    1406          66 :       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
    1407             :       __ testl(kScratchRegister, Immediate(1));
    1408          66 :       __ j(zero, &done_compare, Label::kNear);
    1409          66 :       __ bind(&compare_swap);
    1410         132 :       if (instr->InputAt(1)->IsFPRegister()) {
    1411          66 :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1412             :       } else {
    1413           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1414             :       }
    1415          66 :       __ bind(&done_compare);
    1416          66 :       __ bind(ool->exit());
    1417             :       break;
    1418             :     }
    1419             :     case kSSEFloat32Min: {
    1420          66 :       Label compare_swap, done_compare;
    1421         132 :       if (instr->InputAt(1)->IsFPRegister()) {
    1422             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1423             :       } else {
    1424           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1425             :       }
    1426             :       auto ool =
    1427          66 :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1428          66 :       __ j(parity_even, ool->entry());
    1429          66 :       __ j(below, &done_compare, Label::kNear);
    1430          66 :       __ j(above, &compare_swap, Label::kNear);
    1431         132 :       if (instr->InputAt(1)->IsFPRegister()) {
    1432          66 :         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
    1433             :       } else {
    1434           0 :         __ Movss(kScratchDoubleReg, i.InputOperand(1));
    1435             :         __ Movmskps(kScratchRegister, kScratchDoubleReg);
    1436             :       }
    1437             :       __ testl(kScratchRegister, Immediate(1));
    1438          66 :       __ j(zero, &done_compare, Label::kNear);
    1439          66 :       __ bind(&compare_swap);
    1440         132 :       if (instr->InputAt(1)->IsFPRegister()) {
    1441          66 :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1442             :       } else {
    1443           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1444             :       }
    1445          66 :       __ bind(&done_compare);
    1446          66 :       __ bind(ool->exit());
    1447             :       break;
    1448             :     }
    1449             :     case kSSEFloat64Max: {
    1450         250 :       Label compare_swap, done_compare;
    1451         500 :       if (instr->InputAt(1)->IsFPRegister()) {
    1452             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1453             :       } else {
    1454           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1455             :       }
    1456             :       auto ool =
    1457         250 :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1458         250 :       __ j(parity_even, ool->entry());
    1459         250 :       __ j(above, &done_compare, Label::kNear);
    1460         250 :       __ j(below, &compare_swap, Label::kNear);
    1461         250 :       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
    1462             :       __ testl(kScratchRegister, Immediate(1));
    1463         250 :       __ j(zero, &done_compare, Label::kNear);
    1464         250 :       __ bind(&compare_swap);
    1465         500 :       if (instr->InputAt(1)->IsFPRegister()) {
    1466         250 :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1467             :       } else {
    1468           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1469             :       }
    1470         250 :       __ bind(&done_compare);
    1471         250 :       __ bind(ool->exit());
    1472             :       break;
    1473             :     }
    1474             :     case kSSEFloat64Min: {
    1475         335 :       Label compare_swap, done_compare;
    1476         670 :       if (instr->InputAt(1)->IsFPRegister()) {
    1477             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1478             :       } else {
    1479           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1480             :       }
    1481             :       auto ool =
    1482         335 :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1483         335 :       __ j(parity_even, ool->entry());
    1484         335 :       __ j(below, &done_compare, Label::kNear);
    1485         335 :       __ j(above, &compare_swap, Label::kNear);
    1486         670 :       if (instr->InputAt(1)->IsFPRegister()) {
    1487         335 :         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
    1488             :       } else {
    1489           0 :         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
    1490             :         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
    1491             :       }
    1492             :       __ testl(kScratchRegister, Immediate(1));
    1493         335 :       __ j(zero, &done_compare, Label::kNear);
    1494         335 :       __ bind(&compare_swap);
    1495         670 :       if (instr->InputAt(1)->IsFPRegister()) {
    1496         335 :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1497             :       } else {
    1498           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1499             :       }
    1500         335 :       __ bind(&done_compare);
    1501         335 :       __ bind(ool->exit());
    1502             :       break;
    1503             :     }
    1504             :     case kSSEFloat64Abs: {
    1505             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1506           6 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1507           6 :       __ psrlq(kScratchDoubleReg, 1);
    1508          12 :       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1509           6 :       break;
    1510             :     }
    1511             :     case kSSEFloat64Neg: {
    1512             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1513          76 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1514          76 :       __ psllq(kScratchDoubleReg, 63);
    1515         152 :       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1516          76 :       break;
    1517             :     }
    1518             :     case kSSEFloat64Sqrt:
    1519         830 :       ASSEMBLE_SSE_UNOP(Sqrtsd);
    1520             :       break;
    1521             :     case kSSEFloat64Round: {
    1522             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1523             :       RoundingMode const mode =
    1524             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1525             :       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1526             :       break;
    1527             :     }
    1528             :     case kSSEFloat64ToFloat32:
    1529       53814 :       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
    1530             :       break;
    1531             :     case kSSEFloat64ToInt32:
    1532      251060 :       if (instr->InputAt(0)->IsFPRegister()) {
    1533      105217 :         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1534             :       } else {
    1535       40626 :         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
    1536             :       }
    1537             :       break;
    1538             :     case kSSEFloat64ToUint32: {
    1539        1520 :       if (instr->InputAt(0)->IsFPRegister()) {
    1540         760 :         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1541             :       } else {
    1542           0 :         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
    1543             :       }
    1544        1520 :       if (MiscField::decode(instr->opcode())) {
    1545        1400 :         __ AssertZeroExtended(i.OutputRegister());
    1546             :       }
    1547             :       break;
    1548             :     }
    1549             :     case kSSEFloat32ToInt64:
    1550         104 :       if (instr->InputAt(0)->IsFPRegister()) {
    1551          52 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1552             :       } else {
    1553           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1554             :       }
    1555          52 :       if (instr->OutputCount() > 1) {
    1556          96 :         __ Set(i.OutputRegister(1), 1);
    1557          48 :         Label done;
    1558          48 :         Label fail;
    1559             :         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
    1560          96 :         if (instr->InputAt(0)->IsFPRegister()) {
    1561          48 :           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
    1562             :         } else {
    1563           0 :           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
    1564             :         }
    1565             :         // If the input is NaN, then the conversion fails.
    1566          48 :         __ j(parity_even, &fail);
    1567             :         // If the input is INT64_MIN, then the conversion succeeds.
    1568          48 :         __ j(equal, &done);
    1569          96 :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1570             :         // If the conversion results in INT64_MIN, but the input was not
    1571             :         // INT64_MIN, then the conversion fails.
    1572          48 :         __ j(no_overflow, &done);
    1573          48 :         __ bind(&fail);
    1574          96 :         __ Set(i.OutputRegister(1), 0);
    1575          48 :         __ bind(&done);
    1576             :       }
    1577             :       break;
    1578             :     case kSSEFloat64ToInt64:
    1579        1440 :       if (instr->InputAt(0)->IsFPRegister()) {
    1580         719 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
    1581             :       } else {
    1582           2 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
    1583             :       }
    1584         722 :       if (instr->OutputCount() > 1) {
    1585        1214 :         __ Set(i.OutputRegister(1), 1);
    1586         608 :         Label done;
    1587         608 :         Label fail;
    1588             :         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
    1589        1216 :         if (instr->InputAt(0)->IsFPRegister()) {
    1590         608 :           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
    1591             :         } else {
    1592           0 :           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
    1593             :         }
    1594             :         // If the input is NaN, then the conversion fails.
    1595         608 :         __ j(parity_even, &fail);
    1596             :         // If the input is INT64_MIN, then the conversion succeeds.
    1597         608 :         __ j(equal, &done);
    1598        1216 :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1599             :         // If the conversion results in INT64_MIN, but the input was not
    1600             :         // INT64_MIN, then the conversion fails.
    1601         606 :         __ j(no_overflow, &done);
    1602         605 :         __ bind(&fail);
    1603        1212 :         __ Set(i.OutputRegister(1), 0);
    1604         607 :         __ bind(&done);
    1605             :       }
    1606             :       break;
    1607             :     case kSSEFloat32ToUint64: {
    1608          52 :       Label fail;
    1609         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1610         104 :       if (instr->InputAt(0)->IsFPRegister()) {
    1611         104 :         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1612             :       } else {
    1613           0 :         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1614             :       }
    1615         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1616          52 :       __ bind(&fail);
    1617             :       break;
    1618             :     }
    1619             :     case kSSEFloat64ToUint64: {
    1620        3756 :       Label fail;
    1621        3812 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1622        7512 :       if (instr->InputAt(0)->IsFPRegister()) {
    1623        7512 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1624             :       } else {
    1625           0 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1626             :       }
    1627        3812 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1628        3756 :       __ bind(&fail);
    1629             :       break;
    1630             :     }
    1631             :     case kSSEInt32ToFloat64:
    1632      745152 :       if (instr->InputAt(0)->IsRegister()) {
    1633      368662 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1634             :       } else {
    1635        7828 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1636             :       }
    1637             :       break;
    1638             :     case kSSEInt32ToFloat32:
    1639        1920 :       if (instr->InputAt(0)->IsRegister()) {
    1640         952 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1641             :       } else {
    1642          16 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1643             :       }
    1644             :       break;
    1645             :     case kSSEInt64ToFloat32:
    1646          64 :       if (instr->InputAt(0)->IsRegister()) {
    1647          32 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1648             :       } else {
    1649           0 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1650             :       }
    1651             :       break;
    1652             :     case kSSEInt64ToFloat64:
    1653        6394 :       if (instr->InputAt(0)->IsRegister()) {
    1654        1079 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1655             :       } else {
    1656        4236 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1657             :       }
    1658             :       break;
    1659             :     case kSSEUint64ToFloat32:
    1660          64 :       if (instr->InputAt(0)->IsRegister()) {
    1661          32 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1662             :       } else {
    1663           0 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1664             :       }
    1665             :       break;
    1666             :     case kSSEUint64ToFloat64:
    1667        6992 :       if (instr->InputAt(0)->IsRegister()) {
    1668        2376 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1669             :       } else {
    1670        2240 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1671             :       }
    1672             :       break;
    1673             :     case kSSEUint32ToFloat64:
    1674       21492 :       if (instr->InputAt(0)->IsRegister()) {
    1675         402 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1676             :       } else {
    1677       20688 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1678             :       }
    1679             :       break;
    1680             :     case kSSEUint32ToFloat32:
    1681         176 :       if (instr->InputAt(0)->IsRegister()) {
    1682          88 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1683             :       } else {
    1684           0 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1685             :       }
    1686             :       break;
    1687             :     case kSSEFloat64ExtractLowWord32:
    1688         232 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1689           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    1690             :       } else {
    1691             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    1692             :       }
    1693             :       break;
    1694             :     case kSSEFloat64ExtractHighWord32:
    1695      186458 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1696      117898 :         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
    1697             :       } else {
    1698       34280 :         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
    1699             :       }
    1700             :       break;
    1701             :     case kSSEFloat64InsertLowWord32:
    1702           8 :       if (instr->InputAt(1)->IsRegister()) {
    1703           4 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
    1704             :       } else {
    1705           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
    1706             :       }
    1707             :       break;
    1708             :     case kSSEFloat64InsertHighWord32:
    1709         232 :       if (instr->InputAt(1)->IsRegister()) {
    1710         116 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
    1711             :       } else {
    1712           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
    1713             :       }
    1714             :       break;
    1715             :     case kSSEFloat64LoadLowWord32:
    1716         224 :       if (instr->InputAt(0)->IsRegister()) {
    1717             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    1718             :       } else {
    1719           0 :         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
    1720             :       }
    1721             :       break;
    1722             :     case kAVXFloat32Cmp: {
    1723             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1724        3770 :       if (instr->InputAt(1)->IsFPRegister()) {
    1725        1867 :         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1726             :       } else {
    1727          36 :         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1728             :       }
    1729             :       break;
    1730             :     }
    1731             :     case kAVXFloat32Add:
    1732        5331 :       ASSEMBLE_AVX_BINOP(vaddss);
    1733             :       break;
    1734             :     case kAVXFloat32Sub:
    1735        7683 :       ASSEMBLE_AVX_BINOP(vsubss);
    1736             :       break;
    1737             :     case kAVXFloat32Mul:
    1738        2619 :       ASSEMBLE_AVX_BINOP(vmulss);
    1739             :       break;
    1740             :     case kAVXFloat32Div:
    1741        1059 :       ASSEMBLE_AVX_BINOP(vdivss);
    1742             :       // Don't delete this mov. It may improve performance on some CPUs,
    1743             :       // when there is a (v)mulss depending on the result.
    1744         353 :       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1745             :       break;
    1746             :     case kAVXFloat64Cmp: {
    1747             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1748      512950 :       if (instr->InputAt(1)->IsFPRegister()) {
    1749      235797 :         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1750             :       } else {
    1751       41356 :         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1752             :       }
    1753             :       break;
    1754             :     }
    1755             :     case kAVXFloat64Add:
    1756      239616 :       ASSEMBLE_AVX_BINOP(vaddsd);
    1757             :       break;
    1758             :     case kAVXFloat64Sub:
    1759       44814 :       ASSEMBLE_AVX_BINOP(vsubsd);
    1760             :       break;
    1761             :     case kAVXFloat64Mul:
    1762       37497 :       ASSEMBLE_AVX_BINOP(vmulsd);
    1763             :       break;
    1764             :     case kAVXFloat64Div:
    1765       35697 :       ASSEMBLE_AVX_BINOP(vdivsd);
    1766             :       // Don't delete this mov. It may improve performance on some CPUs,
    1767             :       // when there is a (v)mulsd depending on the result.
    1768       11899 :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1769             :       break;
    1770             :     case kAVXFloat32Abs: {
    1771             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1772             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1773          66 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1774             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
    1775         132 :       if (instr->InputAt(0)->IsFPRegister()) {
    1776             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1777          66 :                   i.InputDoubleRegister(0));
    1778             :       } else {
    1779             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1780           0 :                   i.InputOperand(0));
    1781             :       }
    1782             :       break;
    1783             :     }
    1784             :     case kAVXFloat32Neg: {
    1785             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1786             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1787         168 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1788             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
    1789         336 :       if (instr->InputAt(0)->IsFPRegister()) {
    1790             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1791         168 :                   i.InputDoubleRegister(0));
    1792             :       } else {
    1793             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1794           0 :                   i.InputOperand(0));
    1795             :       }
    1796             :       break;
    1797             :     }
    1798             :     case kAVXFloat64Abs: {
    1799             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1800             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1801         615 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1802             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
    1803        1230 :       if (instr->InputAt(0)->IsFPRegister()) {
    1804             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1805         615 :                   i.InputDoubleRegister(0));
    1806             :       } else {
    1807             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1808           0 :                   i.InputOperand(0));
    1809             :       }
    1810             :       break;
    1811             :     }
    1812             :     case kAVXFloat64Neg: {
    1813             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1814             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1815        9657 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1816             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
    1817       19314 :       if (instr->InputAt(0)->IsFPRegister()) {
    1818             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1819        9574 :                   i.InputDoubleRegister(0));
    1820             :       } else {
    1821             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1822          83 :                   i.InputOperand(0));
    1823             :       }
    1824             :       break;
    1825             :     }
    1826             :     case kSSEFloat64SilenceNaN:
    1827             :       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
    1828        5907 :       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
    1829             :       break;
    1830             :     case kX64Movsxbl:
    1831       87736 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1832      132215 :       ASSEMBLE_MOVX(movsxbl);
    1833       87736 :       __ AssertZeroExtended(i.OutputRegister());
    1834       43868 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1835       43868 :       break;
    1836             :     case kX64Movzxbl:
    1837      361834 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1838      545138 :       ASSEMBLE_MOVX(movzxbl);
    1839      361834 :       __ AssertZeroExtended(i.OutputRegister());
    1840      180917 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1841      180917 :       break;
    1842             :     case kX64Movsxbq:
    1843       27126 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1844       40693 :       ASSEMBLE_MOVX(movsxbq);
    1845       13563 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1846       13563 :       break;
    1847             :     case kX64Movzxbq:
    1848       27592 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1849       41388 :       ASSEMBLE_MOVX(movzxbq);
    1850       27592 :       __ AssertZeroExtended(i.OutputRegister());
    1851       13796 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1852       13796 :       break;
    1853             :     case kX64Movb: {
    1854      151814 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1855       75907 :       size_t index = 0;
    1856       75907 :       Operand operand = i.MemoryOperand(&index);
    1857      151814 :       if (HasImmediateInput(instr, index)) {
    1858       13284 :         __ movb(operand, Immediate(i.InputInt8(index)));
    1859             :       } else {
    1860      138530 :         __ movb(operand, i.InputRegister(index));
    1861             :       }
    1862       75907 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1863             :       break;
    1864             :     }
    1865             :     case kX64Movsxwl:
    1866       22886 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1867       34811 :       ASSEMBLE_MOVX(movsxwl);
    1868       22886 :       __ AssertZeroExtended(i.OutputRegister());
    1869       11443 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1870       11443 :       break;
    1871             :     case kX64Movzxwl:
    1872      318468 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1873      484094 :       ASSEMBLE_MOVX(movzxwl);
    1874      318468 :       __ AssertZeroExtended(i.OutputRegister());
    1875      159234 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1876      159234 :       break;
    1877             :     case kX64Movsxwq:
    1878       17942 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1879       26917 :       ASSEMBLE_MOVX(movsxwq);
    1880             :       break;
    1881             :     case kX64Movzxwq:
    1882        1344 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1883        2016 :       ASSEMBLE_MOVX(movzxwq);
    1884        1344 :       __ AssertZeroExtended(i.OutputRegister());
    1885         672 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1886         672 :       break;
    1887             :     case kX64Movw: {
    1888       23686 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1889       11843 :       size_t index = 0;
    1890       11843 :       Operand operand = i.MemoryOperand(&index);
    1891       23686 :       if (HasImmediateInput(instr, index)) {
    1892        1734 :         __ movw(operand, Immediate(i.InputInt16(index)));
    1893             :       } else {
    1894       21952 :         __ movw(operand, i.InputRegister(index));
    1895             :       }
    1896       11843 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1897             :       break;
    1898             :     }
    1899             :     case kX64Movl:
    1900     4308102 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1901     2153980 :       if (instr->HasOutput()) {
    1902      611134 :         if (instr->addressing_mode() == kMode_None) {
    1903      119170 :           if (instr->InputAt(0)->IsRegister()) {
    1904      111908 :             __ movl(i.OutputRegister(), i.InputRegister(0));
    1905             :           } else {
    1906        7262 :             __ movl(i.OutputRegister(), i.InputOperand(0));
    1907             :           }
    1908             :         } else {
    1909     1103108 :           __ movl(i.OutputRegister(), i.MemoryOperand());
    1910             :         }
    1911     1222310 :         __ AssertZeroExtended(i.OutputRegister());
    1912             :       } else {
    1913     1542846 :         size_t index = 0;
    1914     1542846 :         Operand operand = i.MemoryOperand(&index);
    1915     3085882 :         if (HasImmediateInput(instr, index)) {
    1916      488154 :           __ movl(operand, i.InputImmediate(index));
    1917             :         } else {
    1918     2109574 :           __ movl(operand, i.InputRegister(index));
    1919             :         }
    1920             :       }
    1921     2154054 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1922     2154076 :       break;
    1923             :     case kX64Movsxlq:
    1924      668606 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1925     1086701 :       ASSEMBLE_MOVX(movsxlq);
    1926      334303 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1927      334303 :       break;
    1928             :     case kX64MovqDecompressTaggedSigned: {
    1929           0 :       CHECK(instr->HasOutput());
    1930           0 :       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
    1931           0 :       break;
    1932             :     }
    1933             :     case kX64MovqDecompressTaggedPointer: {
    1934           0 :       CHECK(instr->HasOutput());
    1935           0 :       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
    1936           0 :       break;
    1937             :     }
    1938             :     case kX64MovqDecompressAnyTagged: {
    1939           0 :       CHECK(instr->HasOutput());
    1940             :       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
    1941           0 :                              i.TempRegister(0));
    1942           0 :       break;
    1943             :     }
    1944             :     case kX64MovqCompressTagged: {
    1945           0 :       CHECK(!instr->HasOutput());
    1946           0 :       size_t index = 0;
    1947           0 :       Operand operand = i.MemoryOperand(&index);
    1948           0 :       if (HasImmediateInput(instr, index)) {
    1949           0 :         __ StoreTaggedField(operand, i.InputImmediate(index));
    1950             :       } else {
    1951           0 :         __ StoreTaggedField(operand, i.InputRegister(index));
    1952             :       }
    1953             :       break;
    1954             :     }
    1955             :     case kX64Movq:
    1956    15395520 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1957     7697749 :       if (instr->HasOutput()) {
    1958     9747362 :         __ movq(i.OutputRegister(), i.MemoryOperand());
    1959             :       } else {
    1960     2824154 :         size_t index = 0;
    1961     2824154 :         Operand operand = i.MemoryOperand(&index);
    1962     5648310 :         if (HasImmediateInput(instr, index)) {
    1963       20762 :           __ movq(operand, i.InputImmediate(index));
    1964             :         } else {
    1965     5606786 :           __ movq(operand, i.InputRegister(index));
    1966             :         }
    1967             :       }
    1968     7697865 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1969     7697847 :       break;
    1970             :     case kX64Movss:
    1971       53200 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1972       26601 :       if (instr->HasOutput()) {
    1973       32130 :         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
    1974             :       } else {
    1975       10536 :         size_t index = 0;
    1976       10536 :         Operand operand = i.MemoryOperand(&index);
    1977       21072 :         __ movss(operand, i.InputDoubleRegister(index));
    1978             :       }
    1979             :       break;
    1980             :     case kX64Movsd: {
    1981     1228050 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1982      614044 :       if (instr->HasOutput()) {
    1983             :         const MemoryAccessMode access_mode =
    1984             :             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
    1985      423854 :         if (access_mode == kMemoryAccessPoisoned) {
    1986             :           // If we have to poison the loaded value, we load into a general
    1987             :           // purpose register first, mask it with the poison, and move the
    1988             :           // value from the general purpose register into the double register.
    1989           0 :           __ movq(kScratchRegister, i.MemoryOperand());
    1990           0 :           __ andq(kScratchRegister, kSpeculationPoisonRegister);
    1991           0 :           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
    1992             :         } else {
    1993      423854 :           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
    1994             :         }
    1995             :       } else {
    1996      190190 :         size_t index = 0;
    1997      190190 :         Operand operand = i.MemoryOperand(&index);
    1998      190192 :         __ Movsd(operand, i.InputDoubleRegister(index));
    1999             :       }
    2000             :       break;
    2001             :     }
    2002             :     case kX64Movdqu: {
    2003             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2004       17632 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2005        8816 :       if (instr->HasOutput()) {
    2006       11584 :         __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
    2007             :       } else {
    2008        3024 :         size_t index = 0;
    2009        3024 :         Operand operand = i.MemoryOperand(&index);
    2010        6048 :         __ movdqu(operand, i.InputSimd128Register(index));
    2011             :       }
    2012             :       break;
    2013             :     }
    2014             :     case kX64BitcastFI:
    2015        1112 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2016           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    2017             :       } else {
    2018             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    2019             :       }
    2020             :       break;
    2021             :     case kX64BitcastDL:
    2022        1048 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2023           0 :         __ movq(i.OutputRegister(), i.InputOperand(0));
    2024             :       } else {
    2025             :         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
    2026             :       }
    2027             :       break;
    2028             :     case kX64BitcastIF:
    2029         616 :       if (instr->InputAt(0)->IsRegister()) {
    2030             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    2031             :       } else {
    2032           0 :         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
    2033             :       }
    2034             :       break;
    2035             :     case kX64BitcastLD:
    2036         310 :       if (instr->InputAt(0)->IsRegister()) {
    2037             :         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
    2038             :       } else {
    2039           0 :         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
    2040             :       }
    2041             :       break;
    2042             :     case kX64Lea32: {
    2043             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2044             :       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
    2045             :       // and addressing mode just happens to work out. The "addl"/"subl" forms
    2046             :       // in these cases are faster based on measurements.
    2047      323724 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2048      143912 :         if (mode == kMode_MRI) {
    2049       76789 :           int32_t constant_summand = i.InputInt32(1);
    2050             :           DCHECK_NE(0, constant_summand);
    2051       76790 :           if (constant_summand > 0) {
    2052      110416 :             __ addl(i.OutputRegister(), Immediate(constant_summand));
    2053             :           } else {
    2054             :             __ subl(i.OutputRegister(),
    2055       43164 :                     Immediate(base::NegateWithWraparound(constant_summand)));
    2056             :           }
    2057       67123 :         } else if (mode == kMode_MR1) {
    2058       12664 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2059         966 :             __ shll(i.OutputRegister(), Immediate(1));
    2060             :           } else {
    2061       11698 :             __ addl(i.OutputRegister(), i.InputRegister(1));
    2062             :           }
    2063       54459 :         } else if (mode == kMode_M2) {
    2064           0 :           __ shll(i.OutputRegister(), Immediate(1));
    2065       54459 :         } else if (mode == kMode_M4) {
    2066         366 :           __ shll(i.OutputRegister(), Immediate(2));
    2067       54093 :         } else if (mode == kMode_M8) {
    2068        2703 :           __ shll(i.OutputRegister(), Immediate(3));
    2069             :         } else {
    2070      102780 :           __ leal(i.OutputRegister(), i.MemoryOperand());
    2071             :         }
    2072      205378 :       } else if (mode == kMode_MR1 &&
    2073             :                  i.InputRegister(1) == i.OutputRegister()) {
    2074       17027 :         __ addl(i.OutputRegister(), i.InputRegister(0));
    2075             :       } else {
    2076      325564 :         __ leal(i.OutputRegister(), i.MemoryOperand());
    2077             :       }
    2078      647456 :       __ AssertZeroExtended(i.OutputRegister());
    2079      323728 :       break;
    2080             :     }
    2081             :     case kX64Lea: {
    2082             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2083             :       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
    2084             :       // and addressing mode just happens to work out. The "addq"/"subq" forms
    2085             :       // in these cases are faster based on measurements.
    2086     2118719 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2087      627549 :         if (mode == kMode_MRI) {
    2088      524100 :           int32_t constant_summand = i.InputInt32(1);
    2089      524102 :           if (constant_summand > 0) {
    2090      860264 :             __ addq(i.OutputRegister(), Immediate(constant_summand));
    2091       93970 :           } else if (constant_summand < 0) {
    2092      281838 :             __ subq(i.OutputRegister(), Immediate(-constant_summand));
    2093             :           }
    2094      103449 :         } else if (mode == kMode_MR1) {
    2095       51484 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2096        2717 :             __ shlq(i.OutputRegister(), Immediate(1));
    2097             :           } else {
    2098       48767 :             __ addq(i.OutputRegister(), i.InputRegister(1));
    2099             :           }
    2100       51965 :         } else if (mode == kMode_M2) {
    2101           0 :           __ shlq(i.OutputRegister(), Immediate(1));
    2102       51965 :         } else if (mode == kMode_M4) {
    2103         336 :           __ shlq(i.OutputRegister(), Immediate(2));
    2104       51629 :         } else if (mode == kMode_M8) {
    2105       12344 :           __ shlq(i.OutputRegister(), Immediate(3));
    2106             :         } else {
    2107       78570 :           __ leaq(i.OutputRegister(), i.MemoryOperand());
    2108             :         }
    2109     1743999 :       } else if (mode == kMode_MR1 &&
    2110             :                  i.InputRegister(1) == i.OutputRegister()) {
    2111      179045 :         __ addq(i.OutputRegister(), i.InputRegister(0));
    2112             :       } else {
    2113     2624260 :         __ leaq(i.OutputRegister(), i.MemoryOperand());
    2114             :       }
    2115             :       break;
    2116             :     }
    2117             :     case kX64Dec32:
    2118           0 :       __ decl(i.OutputRegister());
    2119             :       break;
    2120             :     case kX64Inc32:
    2121           0 :       __ incl(i.OutputRegister());
    2122             :       break;
    2123             :     case kX64Push:
    2124     3089391 :       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
    2125       19480 :         size_t index = 0;
    2126       19480 :         Operand operand = i.MemoryOperand(&index);
    2127       19481 :         __ pushq(operand);
    2128             :         frame_access_state()->IncreaseSPDelta(1);
    2129             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2130       38962 :                                                          kSystemPointerSize);
    2131     3069911 :       } else if (HasImmediateInput(instr, 0)) {
    2132      159249 :         __ pushq(i.InputImmediate(0));
    2133             :         frame_access_state()->IncreaseSPDelta(1);
    2134             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2135      318498 :                                                          kSystemPointerSize);
    2136     2910663 :       } else if (instr->InputAt(0)->IsRegister()) {
    2137     2181832 :         __ pushq(i.InputRegister(0));
    2138             :         frame_access_state()->IncreaseSPDelta(1);
    2139             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2140     4363680 :                                                          kSystemPointerSize);
    2141     1449742 :       } else if (instr->InputAt(0)->IsFloatRegister() ||
    2142             :                  instr->InputAt(0)->IsDoubleRegister()) {
    2143             :         // TODO(titzer): use another machine instruction?
    2144       14806 :         __ subq(rsp, Immediate(kDoubleSize));
    2145             :         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
    2146             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2147       29612 :                                                          kDoubleSize);
    2148       44418 :         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    2149      714025 :       } else if (instr->InputAt(0)->IsSimd128Register()) {
    2150             :         // TODO(titzer): use another machine instruction?
    2151         144 :         __ subq(rsp, Immediate(kSimd128Size));
    2152             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2153             :                                               kSystemPointerSize);
    2154             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2155         288 :                                                          kSimd128Size);
    2156         432 :         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
    2157      728554 :       } else if (instr->InputAt(0)->IsStackSlot() ||
    2158      724666 :                  instr->InputAt(0)->IsFloatStackSlot() ||
    2159             :                  instr->InputAt(0)->IsDoubleStackSlot()) {
    2160      713641 :         __ pushq(i.InputOperand(0));
    2161             :         frame_access_state()->IncreaseSPDelta(1);
    2162             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2163     1427282 :                                                          kSystemPointerSize);
    2164             :       } else {
    2165             :         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
    2166         240 :         __ Movups(kScratchDoubleReg, i.InputOperand(0));
    2167             :         // TODO(titzer): use another machine instruction?
    2168         240 :         __ subq(rsp, Immediate(kSimd128Size));
    2169             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2170             :                                               kSystemPointerSize);
    2171             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2172         480 :                                                          kSimd128Size);
    2173         480 :         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
    2174             :       }
    2175             :       break;
    2176             :     case kX64Poke: {
    2177             :       int slot = MiscField::decode(instr->opcode());
    2178        3392 :       if (HasImmediateInput(instr, 0)) {
    2179        2264 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
    2180             :       } else {
    2181        4520 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
    2182             :       }
    2183             :       break;
    2184             :     }
    2185             :     case kX64Peek: {
    2186        5072 :       int reverse_slot = i.InputInt32(0);
    2187             :       int offset =
    2188        5072 :           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
    2189        5072 :       if (instr->OutputAt(0)->IsFPRegister()) {
    2190             :         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
    2191        2528 :         if (op->representation() == MachineRepresentation::kFloat64) {
    2192        2528 :           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
    2193             :         } else {
    2194             :           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
    2195        2528 :           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
    2196             :         }
    2197             :       } else {
    2198        7632 :         __ movq(i.OutputRegister(), Operand(rbp, offset));
    2199             :       }
    2200             :       break;
    2201             :     }
    2202             :     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
    2203             :     case kX64F32x4Splat: {
    2204         140 :       XMMRegister dst = i.OutputSimd128Register();
    2205         280 :       if (instr->InputAt(0)->IsFPRegister()) {
    2206         140 :         __ movss(dst, i.InputDoubleRegister(0));
    2207             :       } else {
    2208           0 :         __ movss(dst, i.InputOperand(0));
    2209             :       }
    2210         140 :       __ shufps(dst, dst, 0x0);
    2211             :       break;
    2212             :     }
    2213             :     case kX64F32x4ExtractLane: {
    2214             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2215         576 :       __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
    2216         576 :       __ movd(i.OutputDoubleRegister(), kScratchRegister);
    2217             :       break;
    2218             :     }
    2219             :     case kX64F32x4ReplaceLane: {
    2220             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2221             :       // The insertps instruction uses imm8[5:4] to indicate the lane
    2222             :       // that needs to be replaced.
    2223          32 :       byte select = i.InputInt8(1) << 4 & 0x30;
    2224          64 :       __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
    2225             :       break;
    2226             :     }
    2227             :     case kX64F32x4SConvertI32x4: {
    2228           4 :       __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2229           4 :       break;
    2230             :     }
    2231             :     case kX64F32x4UConvertI32x4: {
    2232             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2233             :       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
    2234             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2235             :       XMMRegister dst = i.OutputSimd128Register();
    2236           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
    2237           4 :       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
    2238             :       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
    2239           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
    2240           4 :       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
    2241           4 :       __ cvtdq2ps(dst, dst);             // convert hi exactly
    2242           4 :       __ addps(dst, dst);                // double hi, exactly
    2243           4 :       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
    2244             :       break;
    2245             :     }
    2246             :     case kX64F32x4Abs: {
    2247             :       XMMRegister dst = i.OutputSimd128Register();
    2248             :       XMMRegister src = i.InputSimd128Register(0);
    2249           4 :       if (dst == src) {
    2250           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2251           4 :         __ psrld(kScratchDoubleReg, 1);
    2252           8 :         __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
    2253             :       } else {
    2254           0 :         __ pcmpeqd(dst, dst);
    2255           0 :         __ psrld(dst, 1);
    2256           0 :         __ andps(dst, i.InputSimd128Register(0));
    2257             :       }
    2258             :       break;
    2259             :     }
    2260             :     case kX64F32x4Neg: {
    2261             :       XMMRegister dst = i.OutputSimd128Register();
    2262             :       XMMRegister src = i.InputSimd128Register(0);
    2263           4 :       if (dst == src) {
    2264           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2265           4 :         __ pslld(kScratchDoubleReg, 31);
    2266           8 :         __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
    2267             :       } else {
    2268           0 :         __ pcmpeqd(dst, dst);
    2269           0 :         __ pslld(dst, 31);
    2270           0 :         __ xorps(dst, i.InputSimd128Register(0));
    2271             :       }
    2272             :       break;
    2273             :     }
    2274             :     case kX64F32x4RecipApprox: {
    2275           4 :       __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2276           4 :       break;
    2277             :     }
    2278             :     case kX64F32x4RecipSqrtApprox: {
    2279           4 :       __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2280           4 :       break;
    2281             :     }
    2282             :     case kX64F32x4Add: {
    2283             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2284          12 :       __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2285          12 :       break;
    2286             :     }
    2287             :     case kX64F32x4AddHoriz: {
    2288             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2289             :       CpuFeatureScope sse_scope(tasm(), SSE3);
    2290           4 :       __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2291             :       break;
    2292             :     }
    2293             :     case kX64F32x4Sub: {
    2294             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2295           4 :       __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2296           4 :       break;
    2297             :     }
    2298             :     case kX64F32x4Mul: {
    2299             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2300           4 :       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2301           4 :       break;
    2302             :     }
    2303             :     case kX64F32x4Min: {
    2304             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2305           4 :       __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2306           4 :       break;
    2307             :     }
    2308             :     case kX64F32x4Max: {
    2309             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2310           4 :       __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2311           4 :       break;
    2312             :     }
    2313             :     case kX64F32x4Eq: {
    2314             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2315           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
    2316           4 :       break;
    2317             :     }
    2318             :     case kX64F32x4Ne: {
    2319             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2320           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
    2321           4 :       break;
    2322             :     }
    2323             :     case kX64F32x4Lt: {
    2324             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2325           8 :       __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2326             :       break;
    2327             :     }
    2328             :     case kX64F32x4Le: {
    2329             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2330           8 :       __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2331             :       break;
    2332             :     }
    2333             :     case kX64I32x4Splat: {
    2334        1076 :       XMMRegister dst = i.OutputSimd128Register();
    2335        1076 :       __ movd(dst, i.InputRegister(0));
    2336        1076 :       __ pshufd(dst, dst, 0x0);
    2337             :       break;
    2338             :     }
    2339             :     case kX64I32x4ExtractLane: {
    2340             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2341        7992 :       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
    2342             :       break;
    2343             :     }
    2344             :     case kX64I32x4ReplaceLane: {
    2345             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2346        3568 :       if (instr->InputAt(2)->IsRegister()) {
    2347             :         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
    2348         592 :                   i.InputInt8(1));
    2349             :       } else {
    2350        2976 :         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2351             :       }
    2352             :       break;
    2353             :     }
    2354             :     case kX64I32x4SConvertF32x4: {
    2355             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2356             :       XMMRegister dst = i.OutputSimd128Register();
    2357             :       // NAN->0
    2358           4 :       __ movaps(kScratchDoubleReg, dst);
    2359             :       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
    2360             :       __ pand(dst, kScratchDoubleReg);
    2361             :       // Set top bit if >= 0 (but not -0.0!)
    2362             :       __ pxor(kScratchDoubleReg, dst);
    2363             :       // Convert
    2364           4 :       __ cvttps2dq(dst, dst);
    2365             :       // Set top bit if >=0 is now < 0
    2366             :       __ pand(kScratchDoubleReg, dst);
    2367           4 :       __ psrad(kScratchDoubleReg, 31);
    2368             :       // Set positive overflow lanes to 0x7FFFFFFF
    2369             :       __ pxor(dst, kScratchDoubleReg);
    2370             :       break;
    2371             :     }
    2372             :     case kX64I32x4SConvertI16x8Low: {
    2373             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2374           4 :       __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2375             :       break;
    2376             :     }
    2377             :     case kX64I32x4SConvertI16x8High: {
    2378             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2379             :       XMMRegister dst = i.OutputSimd128Register();
    2380           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2381             :       __ pmovsxwd(dst, dst);
    2382             :       break;
    2383             :     }
    2384             :     case kX64I32x4Neg: {
    2385             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2386             :       XMMRegister dst = i.OutputSimd128Register();
    2387             :       XMMRegister src = i.InputSimd128Register(0);
    2388           4 :       if (dst == src) {
    2389           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2390             :         __ psignd(dst, kScratchDoubleReg);
    2391             :       } else {
    2392           0 :         __ pxor(dst, dst);
    2393             :         __ psubd(dst, src);
    2394             :       }
    2395             :       break;
    2396             :     }
    2397             :     case kX64I32x4Shl: {
    2398         248 :       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
    2399         124 :       break;
    2400             :     }
    2401             :     case kX64I32x4ShrS: {
    2402         248 :       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
    2403         124 :       break;
    2404             :     }
    2405             :     case kX64I32x4Add: {
    2406          12 :       __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2407             :       break;
    2408             :     }
    2409             :     case kX64I32x4AddHoriz: {
    2410             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2411           4 :       __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2412             :       break;
    2413             :     }
    2414             :     case kX64I32x4Sub: {
    2415           4 :       __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2416             :       break;
    2417             :     }
    2418             :     case kX64I32x4Mul: {
    2419             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2420           4 :       __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2421             :       break;
    2422             :     }
    2423             :     case kX64I32x4MinS: {
    2424             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2425           4 :       __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2426             :       break;
    2427             :     }
    2428             :     case kX64I32x4MaxS: {
    2429             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2430           4 :       __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2431             :       break;
    2432             :     }
    2433             :     case kX64I32x4Eq: {
    2434          12 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2435             :       break;
    2436             :     }
    2437             :     case kX64I32x4Ne: {
    2438          16 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2439             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2440          16 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2441             :       break;
    2442             :     }
    2443             :     case kX64I32x4GtS: {
    2444           8 :       __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2445             :       break;
    2446             :     }
    2447             :     case kX64I32x4GeS: {
    2448             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2449             :       XMMRegister dst = i.OutputSimd128Register();
    2450             :       XMMRegister src = i.InputSimd128Register(1);
    2451           8 :       __ pminsd(dst, src);
    2452             :       __ pcmpeqd(dst, src);
    2453             :       break;
    2454             :     }
    2455             :     case kX64I32x4UConvertF32x4: {
    2456             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2457             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2458             :       XMMRegister dst = i.OutputSimd128Register();
    2459           4 :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2460             :       // NAN->0, negative->0
    2461           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2462           4 :       __ maxps(dst, kScratchDoubleReg);
    2463             :       // scratch: float representation of max_signed
    2464             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2465           4 :       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
    2466           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
    2467             :       // tmp: convert (src-max_signed).
    2468             :       // Positive overflow lanes -> 0x7FFFFFFF
    2469             :       // Negative lanes -> 0
    2470           4 :       __ movaps(tmp, dst);
    2471           4 :       __ subps(tmp, kScratchDoubleReg);
    2472             :       __ cmpleps(kScratchDoubleReg, tmp);
    2473           4 :       __ cvttps2dq(tmp, tmp);
    2474             :       __ pxor(tmp, kScratchDoubleReg);
    2475             :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2476             :       __ pmaxsd(tmp, kScratchDoubleReg);
    2477             :       // convert. Overflow lanes above max_signed will be 0x80000000
    2478           4 :       __ cvttps2dq(dst, dst);
    2479             :       // Add (src-max_signed) for overflow lanes.
    2480             :       __ paddd(dst, tmp);
    2481             :       break;
    2482             :     }
    2483             :     case kX64I32x4UConvertI16x8Low: {
    2484             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2485           4 :       __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2486             :       break;
    2487             :     }
    2488             :     case kX64I32x4UConvertI16x8High: {
    2489             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2490             :       XMMRegister dst = i.OutputSimd128Register();
    2491           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2492             :       __ pmovzxwd(dst, dst);
    2493             :       break;
    2494             :     }
    2495             :     case kX64I32x4ShrU: {
    2496         248 :       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
    2497         124 :       break;
    2498             :     }
    2499             :     case kX64I32x4MinU: {
    2500             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2501           4 :       __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2502             :       break;
    2503             :     }
    2504             :     case kX64I32x4MaxU: {
    2505             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2506           4 :       __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2507             :       break;
    2508             :     }
    2509             :     case kX64I32x4GtU: {
    2510             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2511             :       XMMRegister dst = i.OutputSimd128Register();
    2512             :       XMMRegister src = i.InputSimd128Register(1);
    2513           8 :       __ pmaxud(dst, src);
    2514             :       __ pcmpeqd(dst, src);
    2515             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2516             :       __ pxor(dst, kScratchDoubleReg);
    2517             :       break;
    2518             :     }
    2519             :     case kX64I32x4GeU: {
    2520             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2521             :       XMMRegister dst = i.OutputSimd128Register();
    2522             :       XMMRegister src = i.InputSimd128Register(1);
    2523           8 :       __ pminud(dst, src);
    2524             :       __ pcmpeqd(dst, src);
    2525             :       break;
    2526             :     }
    2527             :     case kX64S128Zero: {
    2528          16 :       XMMRegister dst = i.OutputSimd128Register();
    2529          16 :       __ xorps(dst, dst);
    2530             :       break;
    2531             :     }
    2532             :     case kX64I16x8Splat: {
    2533         408 :       XMMRegister dst = i.OutputSimd128Register();
    2534         408 :       __ movd(dst, i.InputRegister(0));
    2535         408 :       __ pshuflw(dst, dst, 0x0);
    2536         408 :       __ pshufd(dst, dst, 0x0);
    2537             :       break;
    2538             :     }
    2539             :     case kX64I16x8ExtractLane: {
    2540             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2541        2656 :       Register dst = i.OutputRegister();
    2542        5312 :       __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2543        2656 :       __ movsxwl(dst, dst);
    2544             :       break;
    2545             :     }
    2546             :     case kX64I16x8ReplaceLane: {
    2547             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2548         112 :       if (instr->InputAt(2)->IsRegister()) {
    2549             :         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
    2550         112 :                   i.InputInt8(1));
    2551             :       } else {
    2552           0 :         __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2553             :       }
    2554             :       break;
    2555             :     }
    2556             :     case kX64I16x8SConvertI8x16Low: {
    2557             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2558           4 :       __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2559             :       break;
    2560             :     }
    2561             :     case kX64I16x8SConvertI8x16High: {
    2562             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2563             :       XMMRegister dst = i.OutputSimd128Register();
    2564           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2565             :       __ pmovsxbw(dst, dst);
    2566             :       break;
    2567             :     }
    2568             :     case kX64I16x8Neg: {
    2569             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2570             :       XMMRegister dst = i.OutputSimd128Register();
    2571             :       XMMRegister src = i.InputSimd128Register(0);
    2572           4 :       if (dst == src) {
    2573           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2574             :         __ psignw(dst, kScratchDoubleReg);
    2575             :       } else {
    2576           0 :         __ pxor(dst, dst);
    2577             :         __ psubw(dst, src);
    2578             :       }
    2579             :       break;
    2580             :     }
    2581             :     case kX64I16x8Shl: {
    2582         120 :       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
    2583          60 :       break;
    2584             :     }
    2585             :     case kX64I16x8ShrS: {
    2586         120 :       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
    2587          60 :       break;
    2588             :     }
    2589             :     case kX64I16x8SConvertI32x4: {
    2590             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2591           4 :       __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2592             :       break;
    2593             :     }
    2594             :     case kX64I16x8Add: {
    2595           4 :       __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2596             :       break;
    2597             :     }
    2598             :     case kX64I16x8AddSaturateS: {
    2599           4 :       __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2600             :       break;
    2601             :     }
    2602             :     case kX64I16x8AddHoriz: {
    2603             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2604           4 :       __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2605             :       break;
    2606             :     }
    2607             :     case kX64I16x8Sub: {
    2608           4 :       __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2609             :       break;
    2610             :     }
    2611             :     case kX64I16x8SubSaturateS: {
    2612           4 :       __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2613             :       break;
    2614             :     }
    2615             :     case kX64I16x8Mul: {
    2616             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2617           4 :       __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2618             :       break;
    2619             :     }
    2620             :     case kX64I16x8MinS: {
    2621             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2622           4 :       __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2623             :       break;
    2624             :     }
    2625             :     case kX64I16x8MaxS: {
    2626             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2627           4 :       __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2628             :       break;
    2629             :     }
    2630             :     case kX64I16x8Eq: {
    2631          12 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2632             :       break;
    2633             :     }
    2634             :     case kX64I16x8Ne: {
    2635          16 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2636             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2637          16 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2638             :       break;
    2639             :     }
    2640             :     case kX64I16x8GtS: {
    2641           8 :       __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2642             :       break;
    2643             :     }
    2644             :     case kX64I16x8GeS: {
    2645             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2646             :       XMMRegister dst = i.OutputSimd128Register();
    2647             :       XMMRegister src = i.InputSimd128Register(1);
    2648           8 :       __ pminsw(dst, src);
    2649             :       __ pcmpeqw(dst, src);
    2650             :       break;
    2651             :     }
    2652             :     case kX64I16x8UConvertI8x16Low: {
    2653             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2654           4 :       __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2655             :       break;
    2656             :     }
    2657             :     case kX64I16x8UConvertI8x16High: {
    2658             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2659             :       XMMRegister dst = i.OutputSimd128Register();
    2660           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2661             :       __ pmovzxbw(dst, dst);
    2662             :       break;
    2663             :     }
    2664             :     case kX64I16x8ShrU: {
    2665         120 :       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
    2666          60 :       break;
    2667             :     }
    2668             :     case kX64I16x8UConvertI32x4: {
    2669             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2670             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2671             :       XMMRegister dst = i.OutputSimd128Register();
    2672             :       // Change negative lanes to 0x7FFFFFFF
    2673           4 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2674           4 :       __ psrld(kScratchDoubleReg, 1);
    2675             :       __ pminud(dst, kScratchDoubleReg);
    2676           4 :       __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
    2677             :       __ packusdw(dst, kScratchDoubleReg);
    2678             :       break;
    2679             :     }
    2680             :     case kX64I16x8AddSaturateU: {
    2681           4 :       __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2682             :       break;
    2683             :     }
    2684             :     case kX64I16x8SubSaturateU: {
    2685           4 :       __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2686             :       break;
    2687             :     }
    2688             :     case kX64I16x8MinU: {
    2689             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2690           4 :       __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2691             :       break;
    2692             :     }
    2693             :     case kX64I16x8MaxU: {
    2694             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2695           4 :       __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2696             :       break;
    2697             :     }
    2698             :     case kX64I16x8GtU: {
    2699             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2700             :       XMMRegister dst = i.OutputSimd128Register();
    2701             :       XMMRegister src = i.InputSimd128Register(1);
    2702           8 :       __ pmaxuw(dst, src);
    2703             :       __ pcmpeqw(dst, src);
    2704             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2705             :       __ pxor(dst, kScratchDoubleReg);
    2706             :       break;
    2707             :     }
    2708             :     case kX64I16x8GeU: {
    2709             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2710             :       XMMRegister dst = i.OutputSimd128Register();
    2711             :       XMMRegister src = i.InputSimd128Register(1);
    2712          16 :       __ pminuw(dst, src);
    2713             :       __ pcmpeqw(dst, src);
    2714             :       break;
    2715             :     }
    2716             :     case kX64I8x16Splat: {
    2717             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2718             :       XMMRegister dst = i.OutputSimd128Register();
    2719         304 :       __ movd(dst, i.InputRegister(0));
    2720         304 :       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
    2721             :       __ pshufb(dst, kScratchDoubleReg);
    2722             :       break;
    2723             :     }
    2724             :     case kX64I8x16ExtractLane: {
    2725             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2726        3968 :       Register dst = i.OutputRegister();
    2727        7936 :       __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2728        3968 :       __ movsxbl(dst, dst);
    2729             :       break;
    2730             :     }
    2731             :     case kX64I8x16ReplaceLane: {
    2732             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2733         176 :       if (instr->InputAt(2)->IsRegister()) {
    2734             :         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
    2735         176 :                   i.InputInt8(1));
    2736             :       } else {
    2737           0 :         __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2738             :       }
    2739             :       break;
    2740             :     }
    2741             :     case kX64I8x16SConvertI16x8: {
    2742             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2743           4 :       __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2744             :       break;
    2745             :     }
    2746             :     case kX64I8x16Neg: {
    2747             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2748             :       XMMRegister dst = i.OutputSimd128Register();
    2749             :       XMMRegister src = i.InputSimd128Register(0);
    2750           4 :       if (dst == src) {
    2751           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2752             :         __ psignb(dst, kScratchDoubleReg);
    2753             :       } else {
    2754           0 :         __ pxor(dst, dst);
    2755             :         __ psubb(dst, src);
    2756             :       }
    2757             :       break;
    2758             :     }
    2759             :     case kX64I8x16Shl: {
    2760             :       XMMRegister dst = i.OutputSimd128Register();
    2761             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2762          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2763          28 :       if (shift < 4) {
    2764             :         // For small shifts, doubling is faster.
    2765          24 :         for (int i = 0; i < shift; ++i) {
    2766          24 :           __ paddb(dst, dst);
    2767             :         }
    2768             :       } else {
    2769             :         // Mask off the unwanted bits before word-shifting.
    2770          16 :         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2771          16 :         __ psrlw(kScratchDoubleReg, 8 + shift);
    2772             :         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
    2773             :         __ pand(dst, kScratchDoubleReg);
    2774          16 :         __ psllw(dst, shift);
    2775             :       }
    2776             :       break;
    2777             :     }
    2778             :     case kX64I8x16ShrS: {
    2779             :       XMMRegister dst = i.OutputSimd128Register();
    2780             :       XMMRegister src = i.InputSimd128Register(0);
    2781          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2782             :       // Unpack the bytes into words, do arithmetic shifts, and repack.
    2783          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2784             :       __ punpcklbw(dst, src);
    2785          28 :       __ psraw(kScratchDoubleReg, 8 + shift);
    2786          28 :       __ psraw(dst, 8 + shift);
    2787             :       __ packsswb(dst, kScratchDoubleReg);
    2788             :       break;
    2789             :     }
    2790             :     case kX64I8x16Add: {
    2791           4 :       __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2792             :       break;
    2793             :     }
    2794             :     case kX64I8x16AddSaturateS: {
    2795           4 :       __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2796             :       break;
    2797             :     }
    2798             :     case kX64I8x16Sub: {
    2799           4 :       __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2800             :       break;
    2801             :     }
    2802             :     case kX64I8x16SubSaturateS: {
    2803           4 :       __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2804             :       break;
    2805             :     }
    2806             :     case kX64I8x16Mul: {
    2807             :       XMMRegister dst = i.OutputSimd128Register();
    2808             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2809             :       XMMRegister right = i.InputSimd128Register(1);
    2810           4 :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2811             :       // I16x8 view of I8x16
    2812             :       // left = AAaa AAaa ... AAaa AAaa
    2813             :       // right= BBbb BBbb ... BBbb BBbb
    2814             :       // t = 00AA 00AA ... 00AA 00AA
    2815             :       // s = 00BB 00BB ... 00BB 00BB
    2816           4 :       __ movaps(tmp, dst);
    2817           4 :       __ movaps(kScratchDoubleReg, right);
    2818           4 :       __ psrlw(tmp, 8);
    2819           4 :       __ psrlw(kScratchDoubleReg, 8);
    2820             :       // dst = left * 256
    2821           4 :       __ psllw(dst, 8);
    2822             :       // t = I16x8Mul(t, s)
    2823             :       //    => __PP __PP ...  __PP  __PP
    2824             :       __ pmullw(tmp, kScratchDoubleReg);
    2825             :       // dst = I16x8Mul(left * 256, right)
    2826             :       //    => pp__ pp__ ...  pp__  pp__
    2827             :       __ pmullw(dst, right);
    2828             :       // t = I16x8Shl(t, 8)
    2829             :       //    => PP00 PP00 ...  PP00  PP00
    2830           4 :       __ psllw(tmp, 8);
    2831             :       // dst = I16x8Shr(dst, 8)
    2832             :       //    => 00pp 00pp ...  00pp  00pp
    2833           4 :       __ psrlw(dst, 8);
    2834             :       // dst = I16x8Or(dst, t)
    2835             :       //    => PPpp PPpp ...  PPpp  PPpp
    2836             :       __ por(dst, tmp);
    2837             :       break;
    2838             :     }
    2839             :     case kX64I8x16MinS: {
    2840             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2841           4 :       __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2842             :       break;
    2843             :     }
    2844             :     case kX64I8x16MaxS: {
    2845             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2846           4 :       __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2847             :       break;
    2848             :     }
    2849             :     case kX64I8x16Eq: {
    2850          12 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2851             :       break;
    2852             :     }
    2853             :     case kX64I8x16Ne: {
    2854          16 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2855             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2856          16 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2857             :       break;
    2858             :     }
    2859             :     case kX64I8x16GtS: {
    2860           8 :       __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2861             :       break;
    2862             :     }
    2863             :     case kX64I8x16GeS: {
    2864             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2865             :       XMMRegister dst = i.OutputSimd128Register();
    2866             :       XMMRegister src = i.InputSimd128Register(1);
    2867           8 :       __ pminsb(dst, src);
    2868             :       __ pcmpeqb(dst, src);
    2869             :       break;
    2870             :     }
    2871             :     case kX64I8x16UConvertI16x8: {
    2872             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2873             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2874             :       XMMRegister dst = i.OutputSimd128Register();
    2875             :       // Change negative lanes to 0x7FFF
    2876           4 :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2877           4 :       __ psrlw(kScratchDoubleReg, 1);
    2878             :       __ pminuw(dst, kScratchDoubleReg);
    2879           4 :       __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
    2880             :       __ packuswb(dst, kScratchDoubleReg);
    2881             :       break;
    2882             :     }
    2883             :     case kX64I8x16ShrU: {
    2884             :       XMMRegister dst = i.OutputSimd128Register();
    2885             :       XMMRegister src = i.InputSimd128Register(0);
    2886          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2887             :       // Unpack the bytes into words, do logical shifts, and repack.
    2888          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2889             :       __ punpcklbw(dst, src);
    2890          28 :       __ psrlw(kScratchDoubleReg, 8 + shift);
    2891          28 :       __ psrlw(dst, 8 + shift);
    2892             :       __ packuswb(dst, kScratchDoubleReg);
    2893             :       break;
    2894             :     }
    2895             :     case kX64I8x16AddSaturateU: {
    2896           4 :       __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2897             :       break;
    2898             :     }
    2899             :     case kX64I8x16SubSaturateU: {
    2900           4 :       __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2901             :       break;
    2902             :     }
    2903             :     case kX64I8x16MinU: {
    2904             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2905           4 :       __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2906             :       break;
    2907             :     }
    2908             :     case kX64I8x16MaxU: {
    2909             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2910           4 :       __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2911             :       break;
    2912             :     }
    2913             :     case kX64I8x16GtU: {
    2914             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2915             :       XMMRegister dst = i.OutputSimd128Register();
    2916             :       XMMRegister src = i.InputSimd128Register(1);
    2917           8 :       __ pmaxub(dst, src);
    2918             :       __ pcmpeqb(dst, src);
    2919             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2920             :       __ pxor(dst, kScratchDoubleReg);
    2921             :       break;
    2922             :     }
    2923             :     case kX64I8x16GeU: {
    2924             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2925             :       XMMRegister dst = i.OutputSimd128Register();
    2926             :       XMMRegister src = i.InputSimd128Register(1);
    2927           0 :       __ pminub(dst, src);
    2928             :       __ pcmpeqb(dst, src);
    2929             :       break;
    2930             :     }
    2931             :     case kX64S128And: {
    2932           4 :       __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2933             :       break;
    2934             :     }
    2935             :     case kX64S128Or: {
    2936           4 :       __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2937             :       break;
    2938             :     }
    2939             :     case kX64S128Xor: {
    2940           4 :       __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2941             :       break;
    2942             :     }
    2943             :     case kX64S128Not: {
    2944             :       XMMRegister dst = i.OutputSimd128Register();
    2945             :       XMMRegister src = i.InputSimd128Register(0);
    2946           4 :       if (dst == src) {
    2947           4 :         __ movaps(kScratchDoubleReg, dst);
    2948             :         __ pcmpeqd(dst, dst);
    2949             :         __ pxor(dst, kScratchDoubleReg);
    2950             :       } else {
    2951           0 :         __ pcmpeqd(dst, dst);
    2952             :         __ pxor(dst, src);
    2953             :       }
    2954             : 
    2955             :       break;
    2956             :     }
    2957             :     case kX64S128Select: {
    2958             :       // Mask used here is stored in dst.
    2959          28 :       XMMRegister dst = i.OutputSimd128Register();
    2960          28 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
    2961          56 :       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
    2962          28 :       __ andps(dst, kScratchDoubleReg);
    2963          56 :       __ xorps(dst, i.InputSimd128Register(2));
    2964             :       break;
    2965             :     }
    2966             :     case kX64S8x16Shuffle: {
    2967             :       XMMRegister dst = i.OutputSimd128Register();
    2968             :       Register tmp = i.TempRegister(0);
    2969             :       // Prepare 16 byte aligned buffer for shuffle control mask
    2970        1544 :       __ movq(tmp, rsp);
    2971        1544 :       __ andq(rsp, Immediate(-16));
    2972        1544 :       if (instr->InputCount() == 5) {  // only one input operand
    2973         752 :         uint32_t mask[4] = {};
    2974             :         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2975        4512 :         for (int j = 4; j > 0; j--) {
    2976        6016 :           mask[j - 1] = i.InputUint32(j);
    2977             :         }
    2978             : 
    2979         752 :         SetupShuffleMaskOnStack(tasm(), mask);
    2980        1504 :         __ pshufb(dst, Operand(rsp, 0));
    2981             :       } else {  // two input operands
    2982             :         DCHECK_EQ(6, instr->InputCount());
    2983        2376 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
    2984         792 :         uint32_t mask[4] = {};
    2985        3960 :         for (int j = 5; j > 1; j--) {
    2986        3168 :           uint32_t lanes = i.InputUint32(j);
    2987       15840 :           for (int k = 0; k < 32; k += 8) {
    2988       12672 :             uint8_t lane = lanes >> k;
    2989       12672 :             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
    2990             :           }
    2991             :         }
    2992         792 :         SetupShuffleMaskOnStack(tasm(), mask);
    2993        1584 :         __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
    2994         792 :         uint32_t mask1[4] = {};
    2995        1584 :         if (instr->InputAt(1)->IsSimd128Register()) {
    2996         792 :           XMMRegister src1 = i.InputSimd128Register(1);
    2997         792 :           if (src1 != dst) __ movups(dst, src1);
    2998             :         } else {
    2999           0 :           __ movups(dst, i.InputOperand(1));
    3000             :         }
    3001        3168 :         for (int j = 5; j > 1; j--) {
    3002        3168 :           uint32_t lanes = i.InputUint32(j);
    3003       15840 :           for (int k = 0; k < 32; k += 8) {
    3004       12672 :             uint8_t lane = lanes >> k;
    3005       12672 :             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
    3006             :           }
    3007             :         }
    3008         792 :         SetupShuffleMaskOnStack(tasm(), mask1);
    3009        1584 :         __ pshufb(dst, Operand(rsp, 0));
    3010             :         __ por(dst, kScratchDoubleReg);
    3011             :       }
    3012             :       __ movq(rsp, tmp);
    3013             :       break;
    3014             :     }
    3015             :     case kX64S32x4Swizzle: {
    3016             :       DCHECK_EQ(2, instr->InputCount());
    3017        1216 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
    3018             :                               i.InputInt8(1));
    3019             :       break;
    3020             :     }
    3021             :     case kX64S32x4Shuffle: {
    3022             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3023             :       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
    3024             :       int8_t shuffle = i.InputInt8(2);
    3025             :       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
    3026        1404 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
    3027        1404 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
    3028         936 :       __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
    3029             :       break;
    3030             :     }
    3031             :     case kX64S16x8Blend: {
    3032         112 :       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
    3033          56 :       break;
    3034             :     }
    3035             :     case kX64S16x8HalfShuffle1: {
    3036         248 :       XMMRegister dst = i.OutputSimd128Register();
    3037         992 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
    3038         248 :       __ pshufhw(dst, dst, i.InputInt8(2));
    3039             :       break;
    3040             :     }
    3041             :     case kX64S16x8HalfShuffle2: {
    3042             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3043         176 :       XMMRegister dst = i.OutputSimd128Register();
    3044         704 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
    3045         176 :       __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
    3046         704 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
    3047         176 :       __ pshufhw(dst, dst, i.InputInt8(3));
    3048         176 :       __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
    3049             :       break;
    3050             :     }
    3051             :     case kX64S8x16Alignr: {
    3052         480 :       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
    3053         240 :       break;
    3054             :     }
    3055             :     case kX64S16x8Dup: {
    3056         112 :       XMMRegister dst = i.OutputSimd128Register();
    3057         112 :       int8_t lane = i.InputInt8(1) & 0x7;
    3058         112 :       int8_t lane4 = lane & 0x3;
    3059         112 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3060         112 :       if (lane < 4) {
    3061         336 :         ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
    3062         112 :         __ pshufd(dst, dst, 0);
    3063             :       } else {
    3064           0 :         ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
    3065           0 :         __ pshufd(dst, dst, 0xaa);
    3066             :       }
    3067             :       break;
    3068             :     }
    3069             :     case kX64S8x16Dup: {
    3070             :       XMMRegister dst = i.OutputSimd128Register();
    3071         216 :       int8_t lane = i.InputInt8(1) & 0xf;
    3072             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3073         216 :       if (lane < 8) {
    3074         216 :         __ punpcklbw(dst, dst);
    3075             :       } else {
    3076           0 :         __ punpckhbw(dst, dst);
    3077             :       }
    3078         216 :       lane &= 0x7;
    3079         216 :       int8_t lane4 = lane & 0x3;
    3080         216 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3081         216 :       if (lane < 4) {
    3082         200 :         __ pshuflw(dst, dst, half_dup);
    3083         200 :         __ pshufd(dst, dst, 0);
    3084             :       } else {
    3085          16 :         __ pshufhw(dst, dst, half_dup);
    3086          16 :         __ pshufd(dst, dst, 0xaa);
    3087             :       }
    3088             :       break;
    3089             :     }
    3090             :     case kX64S64x2UnpackHigh:
    3091           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
    3092             :       break;
    3093             :     case kX64S32x4UnpackHigh:
    3094         320 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
    3095             :       break;
    3096             :     case kX64S16x8UnpackHigh:
    3097         480 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
    3098             :       break;
    3099             :     case kX64S8x16UnpackHigh:
    3100         448 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
    3101             :       break;
    3102             :     case kX64S64x2UnpackLow:
    3103          64 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
    3104             :       break;
    3105             :     case kX64S32x4UnpackLow:
    3106         416 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
    3107             :       break;
    3108             :     case kX64S16x8UnpackLow:
    3109         400 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
    3110             :       break;
    3111             :     case kX64S8x16UnpackLow:
    3112         432 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
    3113             :       break;
    3114             :     case kX64S16x8UnzipHigh: {
    3115             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3116             :       XMMRegister dst = i.OutputSimd128Register();
    3117             :       XMMRegister src2 = dst;
    3118             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3119          84 :       if (instr->InputCount() == 2) {
    3120         228 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3121          76 :         __ psrld(kScratchDoubleReg, 16);
    3122             :         src2 = kScratchDoubleReg;
    3123             :       }
    3124          84 :       __ psrld(dst, 16);
    3125             :       __ packusdw(dst, src2);
    3126             :       break;
    3127             :     }
    3128             :     case kX64S16x8UnzipLow: {
    3129             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3130             :       XMMRegister dst = i.OutputSimd128Register();
    3131             :       XMMRegister src2 = dst;
    3132             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3133          84 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    3134          84 :       if (instr->InputCount() == 2) {
    3135         224 :         ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
    3136             :         src2 = kScratchDoubleReg;
    3137             :       }
    3138          84 :       __ pblendw(dst, kScratchDoubleReg, 0xaa);
    3139             :       __ packusdw(dst, src2);
    3140             :       break;
    3141             :     }
    3142             :     case kX64S8x16UnzipHigh: {
    3143             :       XMMRegister dst = i.OutputSimd128Register();
    3144             :       XMMRegister src2 = dst;
    3145             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3146          88 :       if (instr->InputCount() == 2) {
    3147         236 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3148          80 :         __ psrlw(kScratchDoubleReg, 8);
    3149             :         src2 = kScratchDoubleReg;
    3150             :       }
    3151          88 :       __ psrlw(dst, 8);
    3152             :       __ packuswb(dst, src2);
    3153             :       break;
    3154             :     }
    3155             :     case kX64S8x16UnzipLow: {
    3156             :       XMMRegister dst = i.OutputSimd128Register();
    3157             :       XMMRegister src2 = dst;
    3158             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3159         104 :       if (instr->InputCount() == 2) {
    3160         288 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3161          96 :         __ psllw(kScratchDoubleReg, 8);
    3162          96 :         __ psrlw(kScratchDoubleReg, 8);
    3163             :         src2 = kScratchDoubleReg;
    3164             :       }
    3165         104 :       __ psllw(dst, 8);
    3166         104 :       __ psrlw(dst, 8);
    3167             :       __ packuswb(dst, src2);
    3168             :       break;
    3169             :     }
    3170             :     case kX64S8x16TransposeLow: {
    3171             :       XMMRegister dst = i.OutputSimd128Register();
    3172             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3173         124 :       __ psllw(dst, 8);
    3174         124 :       if (instr->InputCount() == 1) {
    3175           8 :         __ movups(kScratchDoubleReg, dst);
    3176             :       } else {
    3177             :         DCHECK_EQ(2, instr->InputCount());
    3178         348 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3179         116 :         __ psllw(kScratchDoubleReg, 8);
    3180             :       }
    3181         124 :       __ psrlw(dst, 8);
    3182             :       __ por(dst, kScratchDoubleReg);
    3183             :       break;
    3184             :     }
    3185             :     case kX64S8x16TransposeHigh: {
    3186             :       XMMRegister dst = i.OutputSimd128Register();
    3187             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3188         112 :       __ psrlw(dst, 8);
    3189         112 :       if (instr->InputCount() == 1) {
    3190           8 :         __ movups(kScratchDoubleReg, dst);
    3191             :       } else {
    3192             :         DCHECK_EQ(2, instr->InputCount());
    3193         312 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3194         104 :         __ psrlw(kScratchDoubleReg, 8);
    3195             :       }
    3196         112 :       __ psllw(kScratchDoubleReg, 8);
    3197             :       __ por(dst, kScratchDoubleReg);
    3198             :       break;
    3199             :     }
    3200             :     case kX64S8x8Reverse:
    3201             :     case kX64S8x4Reverse:
    3202             :     case kX64S8x2Reverse: {
    3203             :       DCHECK_EQ(1, instr->InputCount());
    3204             :       XMMRegister dst = i.OutputSimd128Register();
    3205             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3206         316 :       if (arch_opcode != kX64S8x2Reverse) {
    3207             :         // First shuffle words into position.
    3208         224 :         int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
    3209         224 :         __ pshuflw(dst, dst, shuffle_mask);
    3210         224 :         __ pshufhw(dst, dst, shuffle_mask);
    3211             :       }
    3212         316 :       __ movaps(kScratchDoubleReg, dst);
    3213         316 :       __ psrlw(kScratchDoubleReg, 8);
    3214         316 :       __ psllw(dst, 8);
    3215             :       __ por(dst, kScratchDoubleReg);
    3216             :       break;
    3217             :     }
    3218             :     case kX64S1x4AnyTrue:
    3219             :     case kX64S1x8AnyTrue:
    3220             :     case kX64S1x16AnyTrue: {
    3221             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3222             :       Register dst = i.OutputRegister();
    3223             :       XMMRegister src = i.InputSimd128Register(0);
    3224             :       Register tmp = i.TempRegister(0);
    3225          60 :       __ xorq(tmp, tmp);
    3226             :       __ movq(dst, Immediate(1));
    3227             :       __ ptest(src, src);
    3228          60 :       __ cmovq(zero, dst, tmp);
    3229             :       break;
    3230             :     }
    3231             :     case kX64S1x4AllTrue:
    3232             :     case kX64S1x8AllTrue:
    3233             :     case kX64S1x16AllTrue: {
    3234             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3235             :       Register dst = i.OutputRegister();
    3236             :       XMMRegister src = i.InputSimd128Register(0);
    3237             :       Register tmp = i.TempRegister(0);
    3238          60 :       __ movq(tmp, Immediate(1));
    3239             :       __ xorq(dst, dst);
    3240             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    3241             :       __ pxor(kScratchDoubleReg, src);
    3242             :       __ ptest(kScratchDoubleReg, kScratchDoubleReg);
    3243          60 :       __ cmovq(zero, dst, tmp);
    3244             :       break;
    3245             :     }
    3246             :     case kX64StackCheck:
    3247      556438 :       __ CompareRoot(rsp, RootIndex::kStackLimit);
    3248      556443 :       break;
    3249             :     case kWord32AtomicExchangeInt8: {
    3250        1974 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3251        1974 :       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
    3252         987 :       break;
    3253             :     }
    3254             :     case kWord32AtomicExchangeUint8: {
    3255        1296 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3256         648 :       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
    3257             :       break;
    3258             :     }
    3259             :     case kWord32AtomicExchangeInt16: {
    3260        1388 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3261        1388 :       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
    3262         694 :       break;
    3263             :     }
    3264             :     case kWord32AtomicExchangeUint16: {
    3265        1896 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3266         948 :       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
    3267             :       break;
    3268             :     }
    3269             :     case kWord32AtomicExchangeWord32: {
    3270        2400 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3271             :       break;
    3272             :     }
    3273             :     case kWord32AtomicCompareExchangeInt8: {
    3274         112 :       __ lock();
    3275         224 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3276         112 :       __ movsxbl(rax, rax);
    3277         112 :       break;
    3278             :     }
    3279             :     case kWord32AtomicCompareExchangeUint8: {
    3280         129 :       __ lock();
    3281         258 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3282             :       __ movzxbl(rax, rax);
    3283             :       break;
    3284             :     }
    3285             :     case kWord32AtomicCompareExchangeInt16: {
    3286         112 :       __ lock();
    3287         224 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3288         112 :       __ movsxwl(rax, rax);
    3289         112 :       break;
    3290             :     }
    3291             :     case kWord32AtomicCompareExchangeUint16: {
    3292         129 :       __ lock();
    3293         258 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3294             :       __ movzxwl(rax, rax);
    3295             :       break;
    3296             :     }
    3297             :     case kWord32AtomicCompareExchangeWord32: {
    3298         257 :       __ lock();
    3299         257 :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3300             :       break;
    3301             :     }
    3302             : #define ATOMIC_BINOP_CASE(op, inst)              \
    3303             :   case kWord32Atomic##op##Int8:                  \
    3304             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3305             :     __ movsxbl(rax, rax);                        \
    3306             :     break;                                       \
    3307             :   case kWord32Atomic##op##Uint8:                 \
    3308             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3309             :     __ movzxbl(rax, rax);                        \
    3310             :     break;                                       \
    3311             :   case kWord32Atomic##op##Int16:                 \
    3312             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3313             :     __ movsxwl(rax, rax);                        \
    3314             :     break;                                       \
    3315             :   case kWord32Atomic##op##Uint16:                \
    3316             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3317             :     __ movzxwl(rax, rax);                        \
    3318             :     break;                                       \
    3319             :   case kWord32Atomic##op##Word32:                \
    3320             :     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
    3321             :     break;
    3322       11291 :       ATOMIC_BINOP_CASE(Add, addl)
    3323       11628 :       ATOMIC_BINOP_CASE(Sub, subl)
    3324       11307 :       ATOMIC_BINOP_CASE(And, andl)
    3325       11164 :       ATOMIC_BINOP_CASE(Or, orl)
    3326       12044 :       ATOMIC_BINOP_CASE(Xor, xorl)
    3327             : #undef ATOMIC_BINOP_CASE
    3328             :     case kX64Word64AtomicExchangeUint8: {
    3329        3624 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3330        1812 :       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
    3331             :       break;
    3332             :     }
    3333             :     case kX64Word64AtomicExchangeUint16: {
    3334        3322 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3335        1661 :       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
    3336             :       break;
    3337             :     }
    3338             :     case kX64Word64AtomicExchangeUint32: {
    3339        1704 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3340             :       break;
    3341             :     }
    3342             :     case kX64Word64AtomicExchangeUint64: {
    3343        1768 :       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
    3344             :       break;
    3345             :     }
    3346             :     case kX64Word64AtomicCompareExchangeUint8: {
    3347          17 :       __ lock();
    3348          34 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3349             :       __ movzxbq(rax, rax);
    3350             :       break;
    3351             :     }
    3352             :     case kX64Word64AtomicCompareExchangeUint16: {
    3353          25 :       __ lock();
    3354          50 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3355             :       __ movzxwq(rax, rax);
    3356             :       break;
    3357             :     }
    3358             :     case kX64Word64AtomicCompareExchangeUint32: {
    3359          25 :       __ lock();
    3360          25 :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3361             :       break;
    3362             :     }
    3363             :     case kX64Word64AtomicCompareExchangeUint64: {
    3364         265 :       __ lock();
    3365         265 :       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
    3366             :       break;
    3367             :     }
    3368             : #define ATOMIC64_BINOP_CASE(op, inst)              \
    3369             :   case kX64Word64Atomic##op##Uint8:                \
    3370             :     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
    3371             :     __ movzxbq(rax, rax);                          \
    3372             :     break;                                         \
    3373             :   case kX64Word64Atomic##op##Uint16:               \
    3374             :     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
    3375             :     __ movzxwq(rax, rax);                          \
    3376             :     break;                                         \
    3377             :   case kX64Word64Atomic##op##Uint32:               \
    3378             :     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
    3379             :     break;                                         \
    3380             :   case kX64Word64Atomic##op##Uint64:               \
    3381             :     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
    3382             :     break;
    3383       10494 :       ATOMIC64_BINOP_CASE(Add, addq)
    3384        9814 :       ATOMIC64_BINOP_CASE(Sub, subq)
    3385       10786 :       ATOMIC64_BINOP_CASE(And, andq)
    3386        9146 :       ATOMIC64_BINOP_CASE(Or, orq)
    3387       11220 :       ATOMIC64_BINOP_CASE(Xor, xorq)
    3388             : #undef ATOMIC64_BINOP_CASE
    3389             :     case kWord32AtomicLoadInt8:
    3390             :     case kWord32AtomicLoadUint8:
    3391             :     case kWord32AtomicLoadInt16:
    3392             :     case kWord32AtomicLoadUint16:
    3393             :     case kWord32AtomicLoadWord32:
    3394             :     case kWord32AtomicStoreWord8:
    3395             :     case kWord32AtomicStoreWord16:
    3396             :     case kWord32AtomicStoreWord32:
    3397             :     case kX64Word64AtomicLoadUint8:
    3398             :     case kX64Word64AtomicLoadUint16:
    3399             :     case kX64Word64AtomicLoadUint32:
    3400             :     case kX64Word64AtomicLoadUint64:
    3401             :     case kX64Word64AtomicStoreWord8:
    3402             :     case kX64Word64AtomicStoreWord16:
    3403             :     case kX64Word64AtomicStoreWord32:
    3404             :     case kX64Word64AtomicStoreWord64:
    3405           0 :       UNREACHABLE();  // Won't be generated by instruction selector.
    3406             :       break;
    3407             :   }
    3408             :   return kSuccess;
    3409             : }  // NOLadability/fn_size)
    3410             : 
    3411             : #undef ASSEMBLE_UNOP
    3412             : #undef ASSEMBLE_BINOP
    3413             : #undef ASSEMBLE_COMPARE
    3414             : #undef ASSEMBLE_MULT
    3415             : #undef ASSEMBLE_SHIFT
    3416             : #undef ASSEMBLE_MOVX
    3417             : #undef ASSEMBLE_SSE_BINOP
    3418             : #undef ASSEMBLE_SSE_UNOP
    3419             : #undef ASSEMBLE_AVX_BINOP
    3420             : #undef ASSEMBLE_IEEE754_BINOP
    3421             : #undef ASSEMBLE_IEEE754_UNOP
    3422             : #undef ASSEMBLE_ATOMIC_BINOP
    3423             : #undef ASSEMBLE_ATOMIC64_BINOP
    3424             : #undef ASSEMBLE_SIMD_INSTR
    3425             : #undef ASSEMBLE_SIMD_IMM_INSTR
    3426             : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
    3427             : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
    3428             : 
    3429             : namespace {
    3430             : 
    3431     6099857 : Condition FlagsConditionToCondition(FlagsCondition condition) {
    3432     6099857 :   switch (condition) {
    3433             :     case kUnorderedEqual:
    3434             :     case kEqual:
    3435             :       return equal;
    3436             :     case kUnorderedNotEqual:
    3437             :     case kNotEqual:
    3438     1390788 :       return not_equal;
    3439             :     case kSignedLessThan:
    3440      175793 :       return less;
    3441             :     case kSignedGreaterThanOrEqual:
    3442       57919 :       return greater_equal;
    3443             :     case kSignedLessThanOrEqual:
    3444       68783 :       return less_equal;
    3445             :     case kSignedGreaterThan:
    3446       74990 :       return greater;
    3447             :     case kUnsignedLessThan:
    3448      292394 :       return below;
    3449             :     case kUnsignedGreaterThanOrEqual:
    3450      222758 :       return above_equal;
    3451             :     case kUnsignedLessThanOrEqual:
    3452      903168 :       return below_equal;
    3453             :     case kUnsignedGreaterThan:
    3454      186290 :       return above;
    3455             :     case kOverflow:
    3456      179651 :       return overflow;
    3457             :     case kNotOverflow:
    3458        1456 :       return no_overflow;
    3459             :     default:
    3460             :       break;
    3461             :   }
    3462           0 :   UNREACHABLE();
    3463             : }
    3464             : 
    3465             : }  // namespace
    3466             : 
    3467             : // Assembles branches after this instruction.
    3468     5356336 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
    3469             :   Label::Distance flabel_distance =
    3470     5356336 :       branch->fallthru ? Label::kNear : Label::kFar;
    3471     5356336 :   Label* tlabel = branch->true_label;
    3472     5356336 :   Label* flabel = branch->false_label;
    3473     5356336 :   if (branch->condition == kUnorderedEqual) {
    3474       51281 :     __ j(parity_even, flabel, flabel_distance);
    3475     5305055 :   } else if (branch->condition == kUnorderedNotEqual) {
    3476      101657 :     __ j(parity_even, tlabel);
    3477             :   }
    3478     5356334 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3479             : 
    3480     5356333 :   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
    3481     5356333 : }
    3482             : 
    3483           0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
    3484             :                                             Instruction* instr) {
    3485             :   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
    3486           0 :   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
    3487           0 :     return;
    3488             :   }
    3489             : 
    3490             :   condition = NegateFlagsCondition(condition);
    3491           0 :   __ movl(kScratchRegister, Immediate(0));
    3492             :   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
    3493           0 :            kScratchRegister);
    3494             : }
    3495             : 
    3496      333240 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
    3497         288 :                                             BranchInfo* branch) {
    3498             :   Label::Distance flabel_distance =
    3499      333240 :       branch->fallthru ? Label::kNear : Label::kFar;
    3500      333240 :   Label* tlabel = branch->true_label;
    3501      333240 :   Label* flabel = branch->false_label;
    3502      333240 :   Label nodeopt;
    3503      333240 :   if (branch->condition == kUnorderedEqual) {
    3504           0 :     __ j(parity_even, flabel, flabel_distance);
    3505      333240 :   } else if (branch->condition == kUnorderedNotEqual) {
    3506        4105 :     __ j(parity_even, tlabel);
    3507             :   }
    3508      333240 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3509             : 
    3510      333237 :   if (FLAG_deopt_every_n_times > 0) {
    3511             :     ExternalReference counter =
    3512         288 :         ExternalReference::stress_deopt_count(isolate());
    3513             : 
    3514         288 :     __ pushfq();
    3515         288 :     __ pushq(rax);
    3516         288 :     __ load_rax(counter);
    3517             :     __ decl(rax);
    3518         288 :     __ j(not_zero, &nodeopt);
    3519             : 
    3520         288 :     __ Set(rax, FLAG_deopt_every_n_times);
    3521         288 :     __ store_rax(counter);
    3522         288 :     __ popq(rax);
    3523         288 :     __ popfq();
    3524         288 :     __ jmp(tlabel);
    3525             : 
    3526         288 :     __ bind(&nodeopt);
    3527         288 :     __ store_rax(counter);
    3528         288 :     __ popq(rax);
    3529         288 :     __ popfq();
    3530             :   }
    3531             : 
    3532      333237 :   if (!branch->fallthru) {
    3533           0 :     __ jmp(flabel, flabel_distance);
    3534             :   }
    3535      333237 : }
    3536             : 
    3537     8012114 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
    3538     8012114 :   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
    3539     5010264 : }
    3540             : 
    3541       33442 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
    3542       33442 :                                      FlagsCondition condition) {
    3543             :   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
    3544       33434 :   Label* tlabel = ool->entry();
    3545       33434 :   Label end;
    3546       33434 :   if (condition == kUnorderedEqual) {
    3547           0 :     __ j(parity_even, &end);
    3548       33434 :   } else if (condition == kUnorderedNotEqual) {
    3549         304 :     __ j(parity_even, tlabel);
    3550             :   }
    3551       33434 :   __ j(FlagsConditionToCondition(condition), tlabel);
    3552       33445 :   __ bind(&end);
    3553       33448 : }
    3554             : 
    3555             : // Assembles boolean materializations after this instruction.
    3556      753712 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
    3557             :                                         FlagsCondition condition) {
    3558             :   X64OperandConverter i(this, instr);
    3559      376856 :   Label done;
    3560             : 
    3561             :   // Materialize a full 64-bit 1 or 0 value. The result register is always the
    3562             :   // last output of the instruction.
    3563      376856 :   Label check;
    3564             :   DCHECK_NE(0u, instr->OutputCount());
    3565      376856 :   Register reg = i.OutputRegister(instr->OutputCount() - 1);
    3566      376856 :   if (condition == kUnorderedEqual) {
    3567        2923 :     __ j(parity_odd, &check, Label::kNear);
    3568             :     __ movl(reg, Immediate(0));
    3569        2925 :     __ jmp(&done, Label::kNear);
    3570      373933 :   } else if (condition == kUnorderedNotEqual) {
    3571        2582 :     __ j(parity_odd, &check, Label::kNear);
    3572             :     __ movl(reg, Immediate(1));
    3573        2581 :     __ jmp(&done, Label::kNear);
    3574             :   }
    3575      376857 :   __ bind(&check);
    3576      376859 :   __ setcc(FlagsConditionToCondition(condition), reg);
    3577             :   __ movzxbl(reg, reg);
    3578      376857 :   __ bind(&done);
    3579      376859 : }
    3580             : 
    3581      473934 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
    3582             :   X64OperandConverter i(this, instr);
    3583       34721 :   Register input = i.InputRegister(0);
    3584             :   std::vector<std::pair<int32_t, Label*>> cases;
    3585      473934 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3586      606737 :     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
    3587             :   }
    3588             :   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
    3589       69444 :                                       cases.data() + cases.size());
    3590       34722 : }
    3591             : 
    3592           0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
    3593             :   X64OperandConverter i(this, instr);
    3594           0 :   Register input = i.InputRegister(0);
    3595           0 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3596           0 :     __ cmpl(input, Immediate(i.InputInt32(index + 0)));
    3597           0 :     __ j(equal, GetLabel(i.InputRpo(index + 1)));
    3598             :   }
    3599           0 :   AssembleArchJump(i.InputRpo(1));
    3600           0 : }
    3601             : 
    3602      201992 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
    3603             :   X64OperandConverter i(this, instr);
    3604         314 :   Register input = i.InputRegister(0);
    3605         314 :   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
    3606         314 :   Label** cases = zone()->NewArray<Label*>(case_count);
    3607      201364 :   for (int32_t index = 0; index < case_count; ++index) {
    3608      402100 :     cases[index] = GetLabel(i.InputRpo(index + 2));
    3609             :   }
    3610         314 :   Label* const table = AddJumpTable(cases, case_count);
    3611         314 :   __ cmpl(input, Immediate(case_count));
    3612         628 :   __ j(above_equal, GetLabel(i.InputRpo(1)));
    3613         628 :   __ leaq(kScratchRegister, Operand(table));
    3614         314 :   __ jmp(Operand(kScratchRegister, input, times_8, 0));
    3615         314 : }
    3616             : 
    3617             : namespace {
    3618             : 
    3619             : static const int kQuadWordSize = 16;
    3620             : 
    3621             : }  // namespace
    3622             : 
    3623     2141387 : void CodeGenerator::FinishFrame(Frame* frame) {
    3624     4282774 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3625             : 
    3626             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3627     2141387 :   if (saves_fp != 0) {
    3628             :     frame->AlignSavedCalleeRegisterSlots();
    3629           0 :     if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3630             :       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3631             :       frame->AllocateSavedCalleeRegisterSlots(
    3632           0 :           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
    3633             :     }
    3634             :   }
    3635             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3636     2141387 :   if (saves != 0) {  // Save callee-saved registers.
    3637             :     int count = 0;
    3638    14467344 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3639    14467344 :       if (((1 << i) & saves)) {
    3640     4521045 :         ++count;
    3641             :       }
    3642             :     }
    3643             :     frame->AllocateSavedCalleeRegisterSlots(count);
    3644             :   }
    3645     2141387 : }
    3646             : 
    3647    14679283 : void CodeGenerator::AssembleConstructFrame() {
    3648     4913318 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3649     2151203 :   if (frame_access_state()->has_frame()) {
    3650     2151192 :     int pc_base = __ pc_offset();
    3651             : 
    3652     2151192 :     if (call_descriptor->IsCFunctionCall()) {
    3653      904209 :       __ pushq(rbp);
    3654             :       __ movq(rbp, rsp);
    3655     1246983 :     } else if (call_descriptor->IsJSFunctionCall()) {
    3656      636035 :       __ Prologue();
    3657      636037 :       if (call_descriptor->PushArgumentCount()) {
    3658       38776 :         __ pushq(kJavaScriptCallArgCountRegister);
    3659             :       }
    3660             :     } else {
    3661      610948 :       __ StubPrologue(info()->GetOutputStackFrameType());
    3662      610966 :       if (call_descriptor->IsWasmFunctionCall()) {
    3663      499642 :         __ pushq(kWasmInstanceRegister);
    3664      111324 :       } else if (call_descriptor->IsWasmImportWrapper()) {
    3665             :         // WASM import wrappers are passed a tuple in the place of the instance.
    3666             :         // Unpack the tuple into the instance and the target callable.
    3667             :         // This must be done here in the codegen because it cannot be expressed
    3668             :         // properly in the graph.
    3669             :         __ LoadTaggedPointerField(
    3670             :             kJSFunctionRegister,
    3671        6342 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
    3672             :         __ LoadTaggedPointerField(
    3673             :             kWasmInstanceRegister,
    3674        6342 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
    3675        6342 :         __ pushq(kWasmInstanceRegister);
    3676             :       }
    3677             :     }
    3678             : 
    3679     2151207 :     unwinding_info_writer_.MarkFrameConstructed(pc_base);
    3680             :   }
    3681     2151214 :   int required_slots = frame()->GetTotalFrameSlotCount() -
    3682     2151214 :                        call_descriptor->CalculateFixedFrameSize();
    3683             : 
    3684     2151266 :   if (info()->is_osr()) {
    3685             :     // TurboFan OSR-compiled functions cannot be entered directly.
    3686        4991 :     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
    3687             : 
    3688             :     // Unoptimized code jumps directly to this entrypoint while the unoptimized
    3689             :     // frame is still on the stack. Optimized code uses OSR values directly from
    3690             :     // the unoptimized frame. Thus, all that needs to be done is to allocate the
    3691             :     // remaining stack slots.
    3692        4991 :     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
    3693        9982 :     osr_pc_offset_ = __ pc_offset();
    3694        4991 :     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
    3695        4991 :     ResetSpeculationPoison();
    3696             :   }
    3697             : 
    3698             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3699             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3700             : 
    3701     2151160 :   if (required_slots > 0) {
    3702             :     DCHECK(frame_access_state()->has_frame());
    3703     1656136 :     if (info()->IsWasm() && required_slots > 128) {
    3704             :       // For WebAssembly functions with big frames we have to do the stack
    3705             :       // overflow check before we construct the frame. Otherwise we may not
    3706             :       // have enough space on the stack to call the runtime for the stack
    3707             :       // overflow.
    3708           8 :       Label done;
    3709             : 
    3710             :       // If the frame is bigger than the stack, we throw the stack overflow
    3711             :       // exception unconditionally. Thereby we can avoid the integer overflow
    3712             :       // check in the condition code.
    3713           8 :       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
    3714             :         __ movq(kScratchRegister,
    3715             :                 FieldOperand(kWasmInstanceRegister,
    3716           8 :                              WasmInstanceObject::kRealStackLimitAddressOffset));
    3717          16 :         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
    3718             :         __ addq(kScratchRegister,
    3719           8 :                 Immediate(required_slots * kSystemPointerSize));
    3720           8 :         __ cmpq(rsp, kScratchRegister);
    3721           8 :         __ j(above_equal, &done);
    3722             :       }
    3723             :       __ LoadTaggedPointerField(
    3724             :           rcx, FieldOperand(kWasmInstanceRegister,
    3725           8 :                             WasmInstanceObject::kCEntryStubOffset));
    3726           8 :       __ Move(rsi, Smi::zero());
    3727           8 :       __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
    3728             :       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
    3729             :       RecordSafepoint(reference_map, Safepoint::kSimple,
    3730           8 :                       Safepoint::kNoLazyDeopt);
    3731           8 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
    3732           8 :       __ bind(&done);
    3733             :     }
    3734             : 
    3735             :     // Skip callee-saved and return slots, which are created below.
    3736     1656136 :     required_slots -= base::bits::CountPopulation(saves);
    3737             :     required_slots -= base::bits::CountPopulation(saves_fp) *
    3738     1656136 :                       (kQuadWordSize / kSystemPointerSize);
    3739     1656136 :     required_slots -= frame()->GetReturnSlotCount();
    3740     1656136 :     if (required_slots > 0) {
    3741     2959230 :       __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
    3742             :     }
    3743             :   }
    3744             : 
    3745     2151169 :   if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3746             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3747           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3748             :     // Adjust the stack pointer.
    3749           0 :     __ subq(rsp, Immediate(stack_size));
    3750             :     // Store the registers on the stack.
    3751             :     int slot_idx = 0;
    3752           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3753           0 :       if (!((1 << i) & saves_fp)) continue;
    3754             :       __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
    3755           0 :                 XMMRegister::from_code(i));
    3756           0 :       slot_idx++;
    3757             :     }
    3758             :   }
    3759             : 
    3760     2151169 :   if (saves != 0) {  // Save callee-saved registers.
    3761    14467344 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3762    14467344 :       if (!((1 << i) & saves)) continue;
    3763     4521045 :       __ pushq(Register::from_code(i));
    3764             :     }
    3765             :   }
    3766             : 
    3767             :   // Allocate return slots (located after callee-saved).
    3768     2151169 :   if (frame()->GetReturnSlotCount() > 0) {
    3769        1344 :     __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
    3770             :   }
    3771     2151169 : }
    3772             : 
    3773     4852778 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
    3774     9705556 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3775             : 
    3776             :   // Restore registers.
    3777             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3778     2426389 :   if (saves != 0) {
    3779      913577 :     const int returns = frame()->GetReturnSlotCount();
    3780      913577 :     if (returns != 0) {
    3781        1312 :       __ addq(rsp, Immediate(returns * kSystemPointerSize));
    3782             :     }
    3783    14617232 :     for (int i = 0; i < Register::kNumRegisters; i++) {
    3784    14617232 :       if (!((1 << i) & saves)) continue;
    3785     4567885 :       __ popq(Register::from_code(i));
    3786             :     }
    3787             :   }
    3788             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3789     2426389 :   if (saves_fp != 0) {
    3790             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3791           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3792             :     // Load the registers from the stack.
    3793             :     int slot_idx = 0;
    3794           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3795           0 :       if (!((1 << i) & saves_fp)) continue;
    3796             :       __ movdqu(XMMRegister::from_code(i),
    3797           0 :                 Operand(rsp, kQuadWordSize * slot_idx));
    3798           0 :       slot_idx++;
    3799             :     }
    3800             :     // Adjust the stack pointer.
    3801           0 :     __ addq(rsp, Immediate(stack_size));
    3802             :   }
    3803             : 
    3804             :   unwinding_info_writer_.MarkBlockWillExit();
    3805             : 
    3806             :   // Might need rcx for scratch if pop_size is too big or if there is a variable
    3807             :   // pop count.
    3808             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
    3809             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
    3810     2426389 :   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
    3811             :   X64OperandConverter g(this, nullptr);
    3812     2426389 :   if (call_descriptor->IsCFunctionCall()) {
    3813      913577 :     AssembleDeconstructFrame();
    3814     1512812 :   } else if (frame_access_state()->has_frame()) {
    3815     2886818 :     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
    3816             :       // Canonicalize JSFunction return sites for now.
    3817     1417352 :       if (return_label_.is_bound()) {
    3818      280721 :         __ jmp(&return_label_);
    3819     2426587 :         return;
    3820             :       } else {
    3821     1136631 :         __ bind(&return_label_);
    3822     1136635 :         AssembleDeconstructFrame();
    3823             :       }
    3824             :     } else {
    3825       52108 :       AssembleDeconstructFrame();
    3826             :     }
    3827             :   }
    3828             : 
    3829     2145873 :   if (pop->IsImmediate()) {
    3830     4187561 :     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
    3831     2093772 :     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
    3832     2093772 :     __ Ret(static_cast<int>(pop_size), rcx);
    3833             :   } else {
    3834             :     Register pop_reg = g.ToRegister(pop);
    3835       52084 :     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
    3836       52084 :     __ popq(scratch_reg);
    3837      104168 :     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
    3838       52084 :     __ jmp(scratch_reg);
    3839             :   }
    3840             : }
    3841             : 
    3842     2141249 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
    3843             : 
    3844    35844153 : void CodeGenerator::AssembleMove(InstructionOperand* source,
    3845             :                                  InstructionOperand* destination) {
    3846             :   X64OperandConverter g(this, nullptr);
    3847             :   // Helper function to write the given constant to the dst register.
    3848    18204275 :   auto MoveConstantToRegister = [&](Register dst, Constant src) {
    3849    18204275 :     switch (src.type()) {
    3850             :       case Constant::kInt32: {
    3851     3528041 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3852           0 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3853             :         } else {
    3854     3528041 :           int32_t value = src.ToInt32();
    3855     3528041 :           if (value == 0) {
    3856     1007196 :             __ xorl(dst, dst);
    3857             :           } else {
    3858     2520845 :             __ movl(dst, Immediate(value));
    3859             :           }
    3860             :         }
    3861             :         break;
    3862             :       }
    3863             :       case Constant::kInt64:
    3864     2184653 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3865           0 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3866             :         } else {
    3867     2184653 :           __ Set(dst, src.ToInt64());
    3868             :         }
    3869             :         break;
    3870             :       case Constant::kFloat32:
    3871         672 :         __ MoveNumber(dst, src.ToFloat32());
    3872         336 :         break;
    3873             :       case Constant::kFloat64:
    3874     1775883 :         __ MoveNumber(dst, src.ToFloat64().value());
    3875     1775884 :         break;
    3876             :       case Constant::kExternalReference:
    3877     4703286 :         __ Move(dst, src.ToExternalReference());
    3878     2351652 :         break;
    3879             :       case Constant::kHeapObject: {
    3880     8361585 :         Handle<HeapObject> src_object = src.ToHeapObject();
    3881             :         RootIndex index;
    3882     8361589 :         if (IsMaterializableFromRoot(src_object, &index)) {
    3883     1852870 :           __ LoadRoot(dst, index);
    3884             :         } else {
    3885     6508719 :           __ Move(dst, src_object);
    3886             :         }
    3887             :         break;
    3888             :       }
    3889             :       case Constant::kDelayedStringConstant: {
    3890        2088 :         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
    3891        2088 :         __ MoveStringConstant(dst, src_constant);
    3892        2088 :         break;
    3893             :       }
    3894             :       case Constant::kRpoNumber:
    3895           0 :         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
    3896             :         break;
    3897             :     }
    3898    54048490 :   };
    3899             :   // Helper function to write the given constant to the stack.
    3900       38695 :   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
    3901       38695 :     if (!RelocInfo::IsWasmReference(src.rmode())) {
    3902       38695 :       switch (src.type()) {
    3903             :         case Constant::kInt32:
    3904       19710 :           __ movq(dst, Immediate(src.ToInt32()));
    3905       19711 :           return;
    3906             :         case Constant::kInt64:
    3907       13056 :           __ Set(dst, src.ToInt64());
    3908       13056 :           return;
    3909             :         default:
    3910             :           break;
    3911             :       }
    3912             :     }
    3913        5929 :     MoveConstantToRegister(kScratchRegister, src);
    3914        5929 :     __ movq(dst, kScratchRegister);
    3915    35844153 :   };
    3916             :   // Dispatch on the source and destination operand kinds.
    3917    35844153 :   switch (MoveType::InferMove(source, destination)) {
    3918             :     case MoveType::kRegisterToRegister:
    3919     4105376 :       if (source->IsRegister()) {
    3920     3994296 :         __ movq(g.ToRegister(destination), g.ToRegister(source));
    3921             :       } else {
    3922             :         DCHECK(source->IsFPRegister());
    3923             :         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
    3924             :       }
    3925             :       return;
    3926             :     case MoveType::kRegisterToStack: {
    3927     4948615 :       Operand dst = g.ToOperand(destination);
    3928     4948615 :       if (source->IsRegister()) {
    3929     4659959 :         __ movq(dst, g.ToRegister(source));
    3930             :       } else {
    3931             :         DCHECK(source->IsFPRegister());
    3932             :         XMMRegister src = g.ToDoubleRegister(source);
    3933             :         MachineRepresentation rep =
    3934             :             LocationOperand::cast(source)->representation();
    3935      288656 :         if (rep != MachineRepresentation::kSimd128) {
    3936             :           __ Movsd(dst, src);
    3937             :         } else {
    3938             :           __ Movups(dst, src);
    3939             :         }
    3940             :       }
    3941             :       return;
    3942             :     }
    3943             :     case MoveType::kStackToRegister: {
    3944     8186738 :       Operand src = g.ToOperand(source);
    3945     8186738 :       if (source->IsStackSlot()) {
    3946     7796947 :         __ movq(g.ToRegister(destination), src);
    3947             :       } else {
    3948             :         DCHECK(source->IsFPStackSlot());
    3949             :         XMMRegister dst = g.ToDoubleRegister(destination);
    3950             :         MachineRepresentation rep =
    3951             :             LocationOperand::cast(source)->representation();
    3952      389791 :         if (rep != MachineRepresentation::kSimd128) {
    3953             :           __ Movsd(dst, src);
    3954             :         } else {
    3955             :           __ Movups(dst, src);
    3956             :         }
    3957             :       }
    3958             :       return;
    3959             :     }
    3960             :     case MoveType::kStackToStack: {
    3961       44286 :       Operand src = g.ToOperand(source);
    3962       44286 :       Operand dst = g.ToOperand(destination);
    3963       44286 :       if (source->IsStackSlot()) {
    3964             :         // Spill on demand to use a temporary register for memory-to-memory
    3965             :         // moves.
    3966       24057 :         __ movq(kScratchRegister, src);
    3967             :         __ movq(dst, kScratchRegister);
    3968             :       } else {
    3969             :         MachineRepresentation rep =
    3970             :             LocationOperand::cast(source)->representation();
    3971       20229 :         if (rep != MachineRepresentation::kSimd128) {
    3972             :           __ Movsd(kScratchDoubleReg, src);
    3973             :           __ Movsd(dst, kScratchDoubleReg);
    3974             :         } else {
    3975             :           DCHECK(source->IsSimd128StackSlot());
    3976             :           __ Movups(kScratchDoubleReg, src);
    3977             :           __ Movups(dst, kScratchDoubleReg);
    3978             :         }
    3979             :       }
    3980             :       return;
    3981             :     }
    3982             :     case MoveType::kConstantToRegister: {
    3983    18513994 :       Constant src = g.ToConstant(source);
    3984    18514069 :       if (destination->IsRegister()) {
    3985    18198404 :         MoveConstantToRegister(g.ToRegister(destination), src);
    3986             :       } else {
    3987             :         DCHECK(destination->IsFPRegister());
    3988      315665 :         XMMRegister dst = g.ToDoubleRegister(destination);
    3989      315665 :         if (src.type() == Constant::kFloat32) {
    3990             :           // TODO(turbofan): Can we do better here?
    3991       12393 :           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
    3992             :         } else {
    3993             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    3994      303272 :           __ Move(dst, src.ToFloat64().AsUint64());
    3995             :         }
    3996             :       }
    3997             :       return;
    3998             :     }
    3999             :     case MoveType::kConstantToStack: {
    4000       45287 :       Constant src = g.ToConstant(source);
    4001       45286 :       Operand dst = g.ToOperand(destination);
    4002       45286 :       if (destination->IsStackSlot()) {
    4003       38695 :         MoveConstantToSlot(dst, src);
    4004             :       } else {
    4005             :         DCHECK(destination->IsFPStackSlot());
    4006        6591 :         if (src.type() == Constant::kFloat32) {
    4007        3032 :           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
    4008             :         } else {
    4009             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4010        3559 :           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
    4011             :           __ movq(dst, kScratchRegister);
    4012             :         }
    4013             :       }
    4014             :       return;
    4015             :     }
    4016             :   }
    4017           0 :   UNREACHABLE();
    4018             : }
    4019             : 
    4020       75903 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
    4021        3230 :                                  InstructionOperand* destination) {
    4022             :   X64OperandConverter g(this, nullptr);
    4023             :   // Dispatch on the source and destination operand kinds.  Not all
    4024             :   // combinations are possible.
    4025       75903 :   switch (MoveType::InferSwap(source, destination)) {
    4026             :     case MoveType::kRegisterToRegister: {
    4027       65287 :       if (source->IsRegister()) {
    4028             :         Register src = g.ToRegister(source);
    4029             :         Register dst = g.ToRegister(destination);
    4030       62831 :         __ movq(kScratchRegister, src);
    4031             :         __ movq(src, dst);
    4032             :         __ movq(dst, kScratchRegister);
    4033             :       } else {
    4034             :         DCHECK(source->IsFPRegister());
    4035             :         XMMRegister src = g.ToDoubleRegister(source);
    4036             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4037             :         __ Movapd(kScratchDoubleReg, src);
    4038             :         __ Movapd(src, dst);
    4039             :         __ Movapd(dst, kScratchDoubleReg);
    4040             :       }
    4041             :       return;
    4042             :     }
    4043             :     case MoveType::kRegisterToStack: {
    4044        6899 :       if (source->IsRegister()) {
    4045             :         Register src = g.ToRegister(source);
    4046        1615 :         __ pushq(src);
    4047             :         frame_access_state()->IncreaseSPDelta(1);
    4048             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4049        3230 :                                                          kSystemPointerSize);
    4050             :         __ movq(src, g.ToOperand(destination));
    4051             :         frame_access_state()->IncreaseSPDelta(-1);
    4052        1615 :         __ popq(g.ToOperand(destination));
    4053             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4054        3230 :                                                          -kSystemPointerSize);
    4055             :       } else {
    4056             :         DCHECK(source->IsFPRegister());
    4057             :         XMMRegister src = g.ToDoubleRegister(source);
    4058        5284 :         Operand dst = g.ToOperand(destination);
    4059             :         MachineRepresentation rep =
    4060             :             LocationOperand::cast(source)->representation();
    4061        5284 :         if (rep != MachineRepresentation::kSimd128) {
    4062             :           __ Movsd(kScratchDoubleReg, src);
    4063             :           __ Movsd(src, dst);
    4064             :           __ Movsd(dst, kScratchDoubleReg);
    4065             :         } else {
    4066             :           __ Movups(kScratchDoubleReg, src);
    4067             :           __ Movups(src, dst);
    4068             :           __ Movups(dst, kScratchDoubleReg);
    4069             :         }
    4070             :       }
    4071             :       return;
    4072             :     }
    4073             :     case MoveType::kStackToStack: {
    4074        3717 :       Operand src = g.ToOperand(source);
    4075        3717 :       Operand dst = g.ToOperand(destination);
    4076             :       MachineRepresentation rep =
    4077             :           LocationOperand::cast(source)->representation();
    4078        3717 :       if (rep != MachineRepresentation::kSimd128) {
    4079             :         Register tmp = kScratchRegister;
    4080        2981 :         __ movq(tmp, dst);
    4081        2981 :         __ pushq(src);  // Then use stack to copy src to destination.
    4082             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4083        5962 :                                                          kSystemPointerSize);
    4084        2981 :         __ popq(dst);
    4085             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4086        5962 :                                                          -kSystemPointerSize);
    4087             :         __ movq(src, tmp);
    4088             :       } else {
    4089             :         // Without AVX, misaligned reads and writes will trap. Move using the
    4090             :         // stack, in two parts.
    4091         736 :         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
    4092         736 :         __ pushq(src);  // Then use stack to copy src to destination.
    4093             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4094        1472 :                                                          kSystemPointerSize);
    4095         736 :         __ popq(dst);
    4096             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4097        1472 :                                                          -kSystemPointerSize);
    4098         736 :         __ pushq(g.ToOperand(source, kSystemPointerSize));
    4099             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4100        1472 :                                                          kSystemPointerSize);
    4101         736 :         __ popq(g.ToOperand(destination, kSystemPointerSize));
    4102             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4103        1472 :                                                          -kSystemPointerSize);
    4104         736 :         __ movups(src, kScratchDoubleReg);
    4105             :       }
    4106             :       return;
    4107             :     }
    4108             :     default:
    4109           0 :       UNREACHABLE();
    4110             :       break;
    4111             :   }
    4112             : }
    4113             : 
    4114         314 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
    4115      201364 :   for (size_t index = 0; index < target_count; ++index) {
    4116      201050 :     __ dq(targets[index]);
    4117             :   }
    4118         314 : }
    4119             : 
    4120             : #undef __
    4121             : 
    4122             : }  // namespace compiler
    4123             : }  // namespace internal
    4124      178779 : }  // namespace v8

Generated by: LCOV version 1.10