LCOV - code coverage report
Current view: top level - src/compiler/backend/x64 - code-generator-x64.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1421 1603 88.6 %
Date: 2019-01-20 Functions: 42 59 71.2 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/compiler/backend/code-generator.h"
       6             : 
       7             : #include <limits>
       8             : 
       9             : #include "src/base/overflowing-math.h"
      10             : #include "src/compiler/backend/code-generator-impl.h"
      11             : #include "src/compiler/backend/gap-resolver.h"
      12             : #include "src/compiler/node-matchers.h"
      13             : #include "src/compiler/osr.h"
      14             : #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
      15             : #include "src/macro-assembler.h"
      16             : #include "src/objects/smi.h"
      17             : #include "src/optimized-compilation-info.h"
      18             : #include "src/wasm/wasm-code-manager.h"
      19             : #include "src/wasm/wasm-objects.h"
      20             : #include "src/x64/assembler-x64.h"
      21             : 
      22             : namespace v8 {
      23             : namespace internal {
      24             : namespace compiler {
      25             : 
      26             : #define __ tasm()->
      27             : 
      28             : // Adds X64 specific methods for decoding operands.
      29             : class X64OperandConverter : public InstructionOperandConverter {
      30             :  public:
      31             :   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
      32             :       : InstructionOperandConverter(gen, instr) {}
      33             : 
      34             :   Immediate InputImmediate(size_t index) {
      35     5344908 :     return ToImmediate(instr_->InputAt(index));
      36             :   }
      37             : 
      38     1044868 :   Operand InputOperand(size_t index, int extra = 0) {
      39     3134600 :     return ToOperand(instr_->InputAt(index), extra);
      40             :   }
      41             : 
      42           0 :   Operand OutputOperand() { return ToOperand(instr_->Output()); }
      43             : 
      44     4289866 :   Immediate ToImmediate(InstructionOperand* operand) {
      45     4289866 :     Constant constant = ToConstant(operand);
      46     4289899 :     if (constant.type() == Constant::kFloat64) {
      47             :       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
      48       20796 :       return Immediate(0);
      49             :     }
      50     4269103 :     if (RelocInfo::IsWasmReference(constant.rmode())) {
      51           0 :       return Immediate(constant.ToInt32(), constant.rmode());
      52             :     }
      53     4269103 :     return Immediate(constant.ToInt32());
      54             :   }
      55             : 
      56             :   Operand ToOperand(InstructionOperand* op, int extra = 0) {
      57             :     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
      58    13160463 :     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
      59             :   }
      60             : 
      61    13171980 :   Operand SlotToOperand(int slot_index, int extra = 0) {
      62    26343960 :     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
      63             :     return Operand(offset.from_stack_pointer() ? rsp : rbp,
      64    26344022 :                    offset.offset() + extra);
      65             :   }
      66             : 
      67             :   static size_t NextOffset(size_t* offset) {
      68    14598246 :     size_t i = *offset;
      69    26142584 :     (*offset)++;
      70             :     return i;
      71             :   }
      72             : 
      73             :   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
      74             :     STATIC_ASSERT(0 == static_cast<int>(times_1));
      75             :     STATIC_ASSERT(1 == static_cast<int>(times_2));
      76             :     STATIC_ASSERT(2 == static_cast<int>(times_4));
      77             :     STATIC_ASSERT(3 == static_cast<int>(times_8));
      78     1459591 :     int scale = static_cast<int>(mode - one);
      79             :     DCHECK(scale >= 0 && scale < 4);
      80     1459591 :     return static_cast<ScaleFactor>(scale);
      81             :   }
      82             : 
      83    14598246 :   Operand MemoryOperand(size_t* offset) {
      84    14598246 :     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
      85    14598246 :     switch (mode) {
      86             :       case kMode_MR: {
      87    16555990 :         Register base = InputRegister(NextOffset(offset));
      88             :         int32_t disp = 0;
      89     2117090 :         return Operand(base, disp);
      90             :       }
      91             :       case kMode_MRI: {
      92     9572478 :         Register base = InputRegister(NextOffset(offset));
      93     9572478 :         int32_t disp = InputInt32(NextOffset(offset));
      94     9572508 :         return Operand(base, disp);
      95             :       }
      96             :       case kMode_MR1:
      97             :       case kMode_MR2:
      98             :       case kMode_MR4:
      99             :       case kMode_MR8: {
     100      753301 :         Register base = InputRegister(NextOffset(offset));
     101      753301 :         Register index = InputRegister(NextOffset(offset));
     102             :         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
     103             :         int32_t disp = 0;
     104      753301 :         return Operand(base, index, scale, disp);
     105             :       }
     106             :       case kMode_MR1I:
     107             :       case kMode_MR2I:
     108             :       case kMode_MR4I:
     109             :       case kMode_MR8I: {
     110      536440 :         Register base = InputRegister(NextOffset(offset));
     111      536440 :         Register index = InputRegister(NextOffset(offset));
     112             :         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
     113      536440 :         int32_t disp = InputInt32(NextOffset(offset));
     114      536441 :         return Operand(base, index, scale, disp);
     115             :       }
     116             :       case kMode_M1: {
     117           0 :         Register base = InputRegister(NextOffset(offset));
     118             :         int32_t disp = 0;
     119           0 :         return Operand(base, disp);
     120             :       }
     121             :       case kMode_M2:
     122           0 :         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
     123             :         return Operand(no_reg, 0);
     124             :       case kMode_M4:
     125             :       case kMode_M8: {
     126       24171 :         Register index = InputRegister(NextOffset(offset));
     127             :         ScaleFactor scale = ScaleFor(kMode_M1, mode);
     128             :         int32_t disp = 0;
     129       24171 :         return Operand(index, scale, disp);
     130             :       }
     131             :       case kMode_M1I:
     132             :       case kMode_M2I:
     133             :       case kMode_M4I:
     134             :       case kMode_M8I: {
     135      145679 :         Register index = InputRegister(NextOffset(offset));
     136             :         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
     137      145679 :         int32_t disp = InputInt32(NextOffset(offset));
     138      145679 :         return Operand(index, scale, disp);
     139             :       }
     140             :       case kMode_Root: {
     141     1449087 :         Register base = kRootRegister;
     142     1449087 :         int32_t disp = InputInt32(NextOffset(offset));
     143     1449090 :         return Operand(base, disp);
     144             :       }
     145             :       case kMode_None:
     146           0 :         UNREACHABLE();
     147             :     }
     148           0 :     UNREACHABLE();
     149             :   }
     150             : 
     151             :   Operand MemoryOperand(size_t first_input = 0) {
     152     7878294 :     return MemoryOperand(&first_input);
     153             :   }
     154             : };
     155             : 
     156             : namespace {
     157             : 
     158             : bool HasImmediateInput(Instruction* instr, size_t index) {
     159    19853568 :   return instr->InputAt(index)->IsImmediate();
     160             : }
     161             : 
     162           0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
     163             :  public:
     164             :   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
     165         183 :       : OutOfLineCode(gen), result_(result) {}
     166             : 
     167         183 :   void Generate() final {
     168         366 :     __ Xorps(result_, result_);
     169             :     __ Divss(result_, result_);
     170         183 :   }
     171             : 
     172             :  private:
     173             :   XMMRegister const result_;
     174             : };
     175             : 
     176           0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
     177             :  public:
     178             :   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
     179         647 :       : OutOfLineCode(gen), result_(result) {}
     180             : 
     181         647 :   void Generate() final {
     182        1294 :     __ Xorpd(result_, result_);
     183             :     __ Divsd(result_, result_);
     184         647 :   }
     185             : 
     186             :  private:
     187             :   XMMRegister const result_;
     188             : };
     189             : 
     190           0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
     191             :  public:
     192      111762 :   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
     193             :                              XMMRegister input, StubCallMode stub_mode,
     194             :                              UnwindingInfoWriter* unwinding_info_writer)
     195             :       : OutOfLineCode(gen),
     196             :         result_(result),
     197             :         input_(input),
     198             :         stub_mode_(stub_mode),
     199             :         unwinding_info_writer_(unwinding_info_writer),
     200             :         isolate_(gen->isolate()),
     201      167658 :         zone_(gen->zone()) {}
     202             : 
     203       55882 :   void Generate() final {
     204      335356 :     __ subp(rsp, Immediate(kDoubleSize));
     205             :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     206      111780 :                                                       kDoubleSize);
     207      111791 :     __ Movsd(MemOperand(rsp, 0), input_);
     208       55897 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     209             :       // A direct call to a wasm runtime stub defined in this module.
     210             :       // Just encode the stub index. This will be patched when the code
     211             :       // is added to the native module and copied into wasm code space.
     212        1651 :       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
     213             :     } else {
     214      108485 :       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     215             :     }
     216      167683 :     __ movl(result_, MemOperand(rsp, 0));
     217       55896 :     __ addp(rsp, Immediate(kDoubleSize));
     218             :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     219      111794 :                                                       -kDoubleSize);
     220       55897 :   }
     221             : 
     222             :  private:
     223             :   Register const result_;
     224             :   XMMRegister const input_;
     225             :   StubCallMode stub_mode_;
     226             :   UnwindingInfoWriter* const unwinding_info_writer_;
     227             :   Isolate* isolate_;
     228             :   Zone* zone_;
     229             : };
     230             : 
     231           0 : class OutOfLineRecordWrite final : public OutOfLineCode {
     232             :  public:
     233      313550 :   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
     234             :                        Register value, Register scratch0, Register scratch1,
     235             :                        RecordWriteMode mode, StubCallMode stub_mode)
     236             :       : OutOfLineCode(gen),
     237             :         object_(object),
     238             :         operand_(operand),
     239             :         value_(value),
     240             :         scratch0_(scratch0),
     241             :         scratch1_(scratch1),
     242             :         mode_(mode),
     243             :         stub_mode_(stub_mode),
     244      627100 :         zone_(gen->zone()) {}
     245             : 
     246      313550 :   void Generate() final {
     247      313550 :     if (mode_ > RecordWriteMode::kValueIsPointer) {
     248     1502470 :       __ JumpIfSmi(value_, exit());
     249             :     }
     250             :     __ CheckPageFlag(value_, scratch0_,
     251             :                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
     252      627100 :                      exit());
     253      313550 :     __ leap(scratch1_, operand_);
     254             : 
     255             :     RememberedSetAction const remembered_set_action =
     256             :         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
     257      313550 :                                              : OMIT_REMEMBERED_SET;
     258             :     SaveFPRegsMode const save_fp_mode =
     259      627100 :         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
     260             : 
     261      313550 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     262             :       // A direct call to a wasm runtime stub defined in this module.
     263             :       // Just encode the stub index. This will be patched when the code
     264             :       // is added to the native module and copied into wasm code space.
     265             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     266         171 :                              save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
     267             :     } else {
     268             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     269      313379 :                              save_fp_mode);
     270             :     }
     271      313550 :   }
     272             : 
     273             :  private:
     274             :   Register const object_;
     275             :   Operand const operand_;
     276             :   Register const value_;
     277             :   Register const scratch0_;
     278             :   Register const scratch1_;
     279             :   RecordWriteMode const mode_;
     280             :   StubCallMode const stub_mode_;
     281             :   Zone* zone_;
     282             : };
     283             : 
     284           0 : class WasmOutOfLineTrap : public OutOfLineCode {
     285             :  public:
     286             :   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
     287      503600 :       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
     288             : 
     289       47278 :   void Generate() override {
     290       94556 :     X64OperandConverter i(gen_, instr_);
     291             :     TrapId trap_id =
     292       47278 :         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
     293             :     GenerateWithTrapId(trap_id);
     294       47304 :   }
     295             : 
     296             :  protected:
     297             :   CodeGenerator* gen_;
     298             : 
     299      503567 :   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
     300             : 
     301             :  private:
     302      503552 :   void GenerateCallToTrap(TrapId trap_id) {
     303     1007150 :     if (!gen_->wasm_runtime_exception_support()) {
     304             :       // We cannot test calls to the runtime in cctest/test-run-wasm.
     305             :       // Therefore we emit a call to C here instead of a call to the runtime.
     306     1767589 :       __ PrepareCallCFunction(0);
     307             :       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
     308      760290 :                        0);
     309      380145 :       __ LeaveFrame(StackFrame::WASM_COMPILED);
     310      760290 :       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
     311             :       size_t pop_size =
     312      380145 :           call_descriptor->StackParameterCount() * kSystemPointerSize;
     313             :       // Use rcx as a scratch register, we return anyways immediately.
     314      760290 :       __ Ret(static_cast<int>(pop_size), rcx);
     315             :     } else {
     316      123413 :       gen_->AssembleSourcePosition(instr_);
     317             :       // A direct call to a wasm runtime stub defined in this module.
     318             :       // Just encode the stub index. This will be patched when the code
     319             :       // is added to the native module and copied into wasm code space.
     320      246928 :       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
     321             :       ReferenceMap* reference_map =
     322      123453 :           new (gen_->zone()) ReferenceMap(gen_->zone());
     323             :       gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
     324      123468 :                             Safepoint::kNoLazyDeopt);
     325      123545 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
     326             :     }
     327      503649 :   }
     328             : 
     329             :   Instruction* instr_;
     330             : };
     331             : 
     332           0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
     333             :  public:
     334             :   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
     335      456281 :       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
     336             : 
     337      456260 :   void Generate() final {
     338      912520 :     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
     339      456278 :     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
     340      456349 :   }
     341             : 
     342             :  private:
     343             :   int pc_;
     344             : };
     345             : 
     346    10711446 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
     347             :                          InstructionCode opcode, Instruction* instr,
     348             :                          X64OperandConverter& i, int pc) {
     349             :   const MemoryAccessMode access_mode =
     350    10711446 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     351    10711446 :   if (access_mode == kMemoryAccessProtected) {
     352             :     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
     353             :   }
     354    10711406 : }
     355             : 
     356    10018014 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
     357             :                                    InstructionCode opcode, Instruction* instr,
     358             :                                    X64OperandConverter& i) {
     359             :   const MemoryAccessMode access_mode =
     360    10018014 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     361    10018014 :   if (access_mode == kMemoryAccessPoisoned) {
     362           0 :     Register value = i.OutputRegister();
     363           0 :     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
     364             :   }
     365    10018014 : }
     366             : 
     367             : }  // namespace
     368             : 
     369             : #define ASSEMBLE_UNOP(asm_instr)         \
     370             :   do {                                   \
     371             :     if (instr->Output()->IsRegister()) { \
     372             :       __ asm_instr(i.OutputRegister());  \
     373             :     } else {                             \
     374             :       __ asm_instr(i.OutputOperand());   \
     375             :     }                                    \
     376             :   } while (false)
     377             : 
     378             : #define ASSEMBLE_BINOP(asm_instr)                                     \
     379             :   do {                                                                \
     380             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     381             :       size_t index = 1;                                               \
     382             :       Operand right = i.MemoryOperand(&index);                        \
     383             :       __ asm_instr(i.InputRegister(0), right);                        \
     384             :     } else {                                                          \
     385             :       if (HasImmediateInput(instr, 1)) {                              \
     386             :         if (instr->InputAt(0)->IsRegister()) {                        \
     387             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     388             :         } else {                                                      \
     389             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     390             :         }                                                             \
     391             :       } else {                                                        \
     392             :         if (instr->InputAt(1)->IsRegister()) {                        \
     393             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     394             :         } else {                                                      \
     395             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     396             :         }                                                             \
     397             :       }                                                               \
     398             :     }                                                                 \
     399             :   } while (false)
     400             : 
     401             : #define ASSEMBLE_COMPARE(asm_instr)                                   \
     402             :   do {                                                                \
     403             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     404             :       size_t index = 0;                                               \
     405             :       Operand left = i.MemoryOperand(&index);                         \
     406             :       if (HasImmediateInput(instr, index)) {                          \
     407             :         __ asm_instr(left, i.InputImmediate(index));                  \
     408             :       } else {                                                        \
     409             :         __ asm_instr(left, i.InputRegister(index));                   \
     410             :       }                                                               \
     411             :     } else {                                                          \
     412             :       if (HasImmediateInput(instr, 1)) {                              \
     413             :         if (instr->InputAt(0)->IsRegister()) {                        \
     414             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     415             :         } else {                                                      \
     416             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     417             :         }                                                             \
     418             :       } else {                                                        \
     419             :         if (instr->InputAt(1)->IsRegister()) {                        \
     420             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     421             :         } else {                                                      \
     422             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     423             :         }                                                             \
     424             :       }                                                               \
     425             :     }                                                                 \
     426             :   } while (false)
     427             : 
     428             : #define ASSEMBLE_MULT(asm_instr)                              \
     429             :   do {                                                        \
     430             :     if (HasImmediateInput(instr, 1)) {                        \
     431             :       if (instr->InputAt(0)->IsRegister()) {                  \
     432             :         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
     433             :                      i.InputImmediate(1));                    \
     434             :       } else {                                                \
     435             :         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
     436             :                      i.InputImmediate(1));                    \
     437             :       }                                                       \
     438             :     } else {                                                  \
     439             :       if (instr->InputAt(1)->IsRegister()) {                  \
     440             :         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
     441             :       } else {                                                \
     442             :         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
     443             :       }                                                       \
     444             :     }                                                         \
     445             :   } while (false)
     446             : 
     447             : #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
     448             :   do {                                                                     \
     449             :     if (HasImmediateInput(instr, 1)) {                                     \
     450             :       if (instr->Output()->IsRegister()) {                                 \
     451             :         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
     452             :       } else {                                                             \
     453             :         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
     454             :       }                                                                    \
     455             :     } else {                                                               \
     456             :       if (instr->Output()->IsRegister()) {                                 \
     457             :         __ asm_instr##_cl(i.OutputRegister());                             \
     458             :       } else {                                                             \
     459             :         __ asm_instr##_cl(i.OutputOperand());                              \
     460             :       }                                                                    \
     461             :     }                                                                      \
     462             :   } while (false)
     463             : 
     464             : #define ASSEMBLE_MOVX(asm_instr)                            \
     465             :   do {                                                      \
     466             :     if (instr->addressing_mode() != kMode_None) {           \
     467             :       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
     468             :     } else if (instr->InputAt(0)->IsRegister()) {           \
     469             :       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     470             :     } else {                                                \
     471             :       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
     472             :     }                                                       \
     473             :   } while (false)
     474             : 
     475             : #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
     476             :   do {                                                                  \
     477             :     if (instr->InputAt(1)->IsFPRegister()) {                            \
     478             :       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
     479             :     } else {                                                            \
     480             :       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
     481             :     }                                                                   \
     482             :   } while (false)
     483             : 
     484             : #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
     485             :   do {                                                                  \
     486             :     if (instr->InputAt(0)->IsFPRegister()) {                            \
     487             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
     488             :     } else {                                                            \
     489             :       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
     490             :     }                                                                   \
     491             :   } while (false)
     492             : 
     493             : #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
     494             :   do {                                                                 \
     495             :     CpuFeatureScope avx_scope(tasm(), AVX);                            \
     496             :     if (instr->InputAt(1)->IsFPRegister()) {                           \
     497             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     498             :                    i.InputDoubleRegister(1));                          \
     499             :     } else {                                                           \
     500             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     501             :                    i.InputOperand(1));                                 \
     502             :     }                                                                  \
     503             :   } while (false)
     504             : 
     505             : #define ASSEMBLE_IEEE754_BINOP(name)                                     \
     506             :   do {                                                                   \
     507             :     __ PrepareCallCFunction(2);                                          \
     508             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
     509             :   } while (false)
     510             : 
     511             : #define ASSEMBLE_IEEE754_UNOP(name)                                      \
     512             :   do {                                                                   \
     513             :     __ PrepareCallCFunction(1);                                          \
     514             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
     515             :   } while (false)
     516             : 
     517             : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     518             :   do {                                                          \
     519             :     Label binop;                                                \
     520             :     __ bind(&binop);                                            \
     521             :     __ mov_inst(rax, i.MemoryOperand(1));                       \
     522             :     __ movl(i.TempRegister(0), rax);                            \
     523             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
     524             :     __ lock();                                                  \
     525             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
     526             :     __ j(not_equal, &binop);                                    \
     527             :   } while (false)
     528             : 
     529             : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     530             :   do {                                                            \
     531             :     Label binop;                                                  \
     532             :     __ bind(&binop);                                              \
     533             :     __ mov_inst(rax, i.MemoryOperand(1));                         \
     534             :     __ movq(i.TempRegister(0), rax);                              \
     535             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
     536             :     __ lock();                                                    \
     537             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
     538             :     __ j(not_equal, &binop);                                      \
     539             :   } while (false)
     540             : 
     541             : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
     542             :   do {                                                       \
     543             :     if (instr->InputAt(index)->IsSimd128Register()) {        \
     544             :       __ opcode(dst_operand, i.InputSimd128Register(index)); \
     545             :     } else {                                                 \
     546             :       __ opcode(dst_operand, i.InputOperand(index));         \
     547             :     }                                                        \
     548             :   } while (false)
     549             : 
     550             : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
     551             :   do {                                                            \
     552             :     if (instr->InputAt(index)->IsSimd128Register()) {             \
     553             :       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
     554             :     } else {                                                      \
     555             :       __ opcode(dst_operand, i.InputOperand(index), imm);         \
     556             :     }                                                             \
     557             :   } while (false)
     558             : 
     559             : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
     560             :   do {                                                   \
     561             :     XMMRegister dst = i.OutputSimd128Register();         \
     562             :     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
     563             :     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
     564             :     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
     565             :   } while (false)
     566             : 
     567             : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)                  \
     568             :   do {                                                                    \
     569             :     CpuFeatureScope sse_scope(tasm(), SSELevel);                          \
     570             :     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
     571             :     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
     572             :   } while (false)
     573             : 
     574     2958864 : void CodeGenerator::AssembleDeconstructFrame() {
     575     5917728 :   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
     576     2958931 :   __ movq(rsp, rbp);
     577     2959047 :   __ popq(rbp);
     578     2959037 : }
     579             : 
     580      236584 : void CodeGenerator::AssemblePrepareTailCall() {
     581      118292 :   if (frame_access_state()->has_frame()) {
     582      144204 :     __ movq(rbp, MemOperand(rbp, 0));
     583             :   }
     584             :   frame_access_state()->SetFrameAccessToSP();
     585      118292 : }
     586             : 
     587        1344 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
     588             :                                                      Register scratch1,
     589             :                                                      Register scratch2,
     590             :                                                      Register scratch3) {
     591             :   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
     592        1344 :   Label done;
     593             : 
     594             :   // Check if current frame is an arguments adaptor frame.
     595             :   __ cmpp(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
     596        1344 :           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
     597        1344 :   __ j(not_equal, &done, Label::kNear);
     598             : 
     599             :   // Load arguments count from current arguments adaptor frame (note, it
     600             :   // does not include receiver).
     601        1344 :   Register caller_args_count_reg = scratch1;
     602             :   __ SmiUntag(caller_args_count_reg,
     603        1344 :               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
     604             : 
     605             :   ParameterCount callee_args_count(args_reg);
     606             :   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
     607        1344 :                         scratch3);
     608        1344 :   __ bind(&done);
     609        1344 : }
     610             : 
     611             : namespace {
     612             : 
     613      264733 : void AdjustStackPointerForTailCall(Assembler* assembler,
     614             :                                    FrameAccessState* state,
     615             :                                    int new_slot_above_sp,
     616             :                                    bool allow_shrinkage = true) {
     617             :   int current_sp_offset = state->GetSPToFPSlotCount() +
     618      264733 :                           StandardFrameConstants::kFixedSlotCountAboveFp;
     619      264733 :   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
     620      264733 :   if (stack_slot_delta > 0) {
     621        1820 :     assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
     622             :     state->IncreaseSPDelta(stack_slot_delta);
     623      263823 :   } else if (allow_shrinkage && stack_slot_delta < 0) {
     624      142448 :     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
     625             :     state->IncreaseSPDelta(stack_slot_delta);
     626             :   }
     627      264733 : }
     628             : 
     629        5960 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
     630        5960 :   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
     631        5960 :   assembler->movq(kScratchRegister, shuffle_mask);
     632        5960 :   assembler->Push(kScratchRegister);
     633        5960 :   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
     634        5960 :   assembler->movq(kScratchRegister, shuffle_mask);
     635        5960 :   assembler->Push(kScratchRegister);
     636        5960 : }
     637             : 
     638             : }  // namespace
     639             : 
     640      118307 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
     641      292852 :                                               int first_unused_stack_slot) {
     642      118307 :   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
     643             :   ZoneVector<MoveOperands*> pushes(zone());
     644      118307 :   GetPushCompatibleMoves(instr, flags, &pushes);
     645             : 
     646      129861 :   if (!pushes.empty() &&
     647       23108 :       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
     648             :        first_unused_stack_slot)) {
     649             :     X64OperandConverter g(this, instr);
     650       51227 :     for (auto move : pushes) {
     651             :       LocationOperand destination_location(
     652             :           LocationOperand::cast(move->destination()));
     653       28119 :       InstructionOperand source(move->source());
     654       28119 :       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     655       28119 :                                     destination_location.index());
     656       28119 :       if (source.IsStackSlot()) {
     657             :         LocationOperand source_location(LocationOperand::cast(source));
     658       11569 :         __ Push(g.SlotToOperand(source_location.index()));
     659       16550 :       } else if (source.IsRegister()) {
     660             :         LocationOperand source_location(LocationOperand::cast(source));
     661       16550 :         __ Push(source_location.GetRegister());
     662           0 :       } else if (source.IsImmediate()) {
     663           0 :         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
     664             :       } else {
     665             :         // Pushes of non-scalar data types is not supported.
     666           0 :         UNIMPLEMENTED();
     667             :       }
     668             :       frame_access_state()->IncreaseSPDelta(1);
     669             :       move->Eliminate();
     670             :     }
     671             :   }
     672      118307 :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     673      118307 :                                 first_unused_stack_slot, false);
     674      118307 : }
     675             : 
     676      118307 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
     677      118307 :                                              int first_unused_stack_slot) {
     678      118307 :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     679      118307 :                                 first_unused_stack_slot);
     680      118307 : }
     681             : 
     682             : // Check that {kJavaScriptCallCodeStartRegister} is correct.
     683         108 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
     684         108 :   __ ComputeCodeStartAddress(rbx);
     685         108 :   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
     686         108 :   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
     687         108 : }
     688             : 
     689             : // Check if the code object is marked for deoptimization. If it is, then it
     690             : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
     691             : // to:
     692             : //    1. read from memory the word that contains that bit, which can be found in
     693             : //       the flags in the referenced {CodeDataContainer} object;
     694             : //    2. test kMarkedForDeoptimizationBit in those flags; and
     695             : //    3. if it is not zero then it jumps to the builtin.
     696      912176 : void CodeGenerator::BailoutIfDeoptimized() {
     697             :   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
     698             :   __ LoadTaggedPointerField(rbx,
     699      456073 :                             Operand(kJavaScriptCallCodeStartRegister, offset));
     700             :   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
     701      456104 :            Immediate(1 << Code::kMarkedForDeoptimizationBit));
     702             :   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
     703      456103 :           RelocInfo::CODE_TARGET, not_zero);
     704      456106 : }
     705             : 
     706           0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
     707             :   // Set a mask which has all bits set in the normal case, but has all
     708             :   // bits cleared if we are speculatively executing the wrong PC.
     709           0 :   __ ComputeCodeStartAddress(rbx);
     710           0 :   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
     711           0 :   __ cmpp(kJavaScriptCallCodeStartRegister, rbx);
     712             :   __ movp(rbx, Immediate(-1));
     713           0 :   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
     714           0 : }
     715             : 
     716           0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
     717           0 :   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
     718           0 :   __ andq(kContextRegister, kSpeculationPoisonRegister);
     719           0 :   __ andq(rsp, kSpeculationPoisonRegister);
     720           0 : }
     721             : 
     722             : // Assembles an instruction after register allocation, producing machine code.
     723    64857217 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     724    84746701 :     Instruction* instr) {
     725             :   X64OperandConverter i(this, instr);
     726             :   InstructionCode opcode = instr->opcode();
     727    64857217 :   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
     728    64857217 :   switch (arch_opcode) {
     729             :     case kArchCallCodeObject: {
     730     4040731 :       if (HasImmediateInput(instr, 0)) {
     731     4032826 :         Handle<Code> code = i.InputCode(0);
     732     4032826 :         __ Call(code, RelocInfo::CODE_TARGET);
     733             :       } else {
     734        7935 :         Register reg = i.InputRegister(0);
     735             :         DCHECK_IMPLIES(
     736             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     737             :             reg == kJavaScriptCallCodeStartRegister);
     738        7935 :         __ LoadCodeObjectEntry(reg, reg);
     739        7933 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     740           0 :           __ RetpolineCall(reg);
     741             :         } else {
     742        7933 :           __ call(reg);
     743             :         }
     744             :       }
     745     4040766 :       RecordCallPosition(instr);
     746             :       frame_access_state()->ClearSPDelta();
     747             :       break;
     748             :     }
     749             :     case kArchCallBuiltinPointer: {
     750             :       DCHECK(!HasImmediateInput(instr, 0));
     751       11790 :       Register builtin_pointer = i.InputRegister(0);
     752       11790 :       __ CallBuiltinPointer(builtin_pointer);
     753       11790 :       RecordCallPosition(instr);
     754             :       frame_access_state()->ClearSPDelta();
     755             :       break;
     756             :     }
     757             :     case kArchCallWasmFunction: {
     758     1171159 :       if (HasImmediateInput(instr, 0)) {
     759       94900 :         Constant constant = i.ToConstant(instr->InputAt(0));
     760       94923 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     761       94923 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     762       94902 :           __ near_call(wasm_code, constant.rmode());
     763             :         } else {
     764           0 :           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     765           0 :             __ RetpolineCall(wasm_code, constant.rmode());
     766             :           } else {
     767           0 :             __ Call(wasm_code, constant.rmode());
     768             :           }
     769             :         }
     770             :       } else {
     771     1076259 :         Register reg = i.InputRegister(0);
     772     1076259 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     773           0 :           __ RetpolineCall(reg);
     774             :         } else {
     775     1076259 :           __ call(reg);
     776             :         }
     777             :       }
     778     1171174 :       RecordCallPosition(instr);
     779             :       frame_access_state()->ClearSPDelta();
     780             :       break;
     781             :     }
     782             :     case kArchTailCallCodeObjectFromJSFunction:
     783             :     case kArchTailCallCodeObject: {
     784       34516 :       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
     785             :         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
     786             :                                          i.TempRegister(0), i.TempRegister(1),
     787        1344 :                                          i.TempRegister(2));
     788             :       }
     789       34516 :       if (HasImmediateInput(instr, 0)) {
     790       29670 :         Handle<Code> code = i.InputCode(0);
     791       29670 :         __ Jump(code, RelocInfo::CODE_TARGET);
     792             :       } else {
     793        9692 :         Register reg = i.InputRegister(0);
     794             :         DCHECK_IMPLIES(
     795             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     796             :             reg == kJavaScriptCallCodeStartRegister);
     797        4846 :         __ LoadCodeObjectEntry(reg, reg);
     798        4846 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     799           0 :           __ RetpolineJump(reg);
     800             :         } else {
     801        4846 :           __ jmp(reg);
     802             :         }
     803             :       }
     804             :       unwinding_info_writer_.MarkBlockWillExit();
     805             :       frame_access_state()->ClearSPDelta();
     806       34516 :       frame_access_state()->SetFrameAccessToDefault();
     807       34516 :       break;
     808             :     }
     809             :     case kArchTailCallWasm: {
     810           0 :       if (HasImmediateInput(instr, 0)) {
     811           0 :         Constant constant = i.ToConstant(instr->InputAt(0));
     812           0 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     813           0 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     814           0 :           __ near_jmp(wasm_code, constant.rmode());
     815             :         } else {
     816             :           __ Move(kScratchRegister, wasm_code, constant.rmode());
     817           0 :           __ jmp(kScratchRegister);
     818             :         }
     819             :       } else {
     820           0 :         Register reg = i.InputRegister(0);
     821           0 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     822           0 :           __ RetpolineJump(reg);
     823             :         } else {
     824           0 :           __ jmp(reg);
     825             :         }
     826             :       }
     827             :       unwinding_info_writer_.MarkBlockWillExit();
     828             :       frame_access_state()->ClearSPDelta();
     829           0 :       frame_access_state()->SetFrameAccessToDefault();
     830           0 :       break;
     831             :     }
     832             :     case kArchTailCallAddress: {
     833       83776 :       CHECK(!HasImmediateInput(instr, 0));
     834       83776 :       Register reg = i.InputRegister(0);
     835             :       DCHECK_IMPLIES(
     836             :           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     837             :           reg == kJavaScriptCallCodeStartRegister);
     838       83776 :       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     839           0 :         __ RetpolineJump(reg);
     840             :       } else {
     841       83776 :         __ jmp(reg);
     842             :       }
     843             :       unwinding_info_writer_.MarkBlockWillExit();
     844             :       frame_access_state()->ClearSPDelta();
     845       83776 :       frame_access_state()->SetFrameAccessToDefault();
     846             :       break;
     847             :     }
     848             :     case kArchCallJSFunction: {
     849             :       Register func = i.InputRegister(0);
     850      105787 :       if (FLAG_debug_code) {
     851             :         // Check the function's context matches the context argument.
     852          19 :         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
     853          19 :         __ Assert(equal, AbortReason::kWrongFunctionContext);
     854             :       }
     855             :       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
     856             :       __ LoadTaggedPointerField(rcx,
     857      105787 :                                 FieldOperand(func, JSFunction::kCodeOffset));
     858      105787 :       __ CallCodeObject(rcx);
     859             :       frame_access_state()->ClearSPDelta();
     860      105788 :       RecordCallPosition(instr);
     861             :       break;
     862             :     }
     863             :     case kArchPrepareCallCFunction: {
     864             :       // Frame alignment requires using FP-relative frame addressing.
     865             :       frame_access_state()->SetFrameAccessToFP();
     866       26287 :       int const num_parameters = MiscField::decode(instr->opcode());
     867       26287 :       __ PrepareCallCFunction(num_parameters);
     868       26287 :       break;
     869             :     }
     870             :     case kArchSaveCallerRegisters: {
     871             :       fp_mode_ =
     872         677 :           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
     873             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     874             :       // kReturnRegister0 should have been saved before entering the stub.
     875         677 :       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
     876             :       DCHECK(IsAligned(bytes, kSystemPointerSize));
     877             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     878         677 :       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     879             :       DCHECK(!caller_registers_saved_);
     880         677 :       caller_registers_saved_ = true;
     881         677 :       break;
     882             :     }
     883             :     case kArchRestoreCallerRegisters: {
     884             :       DCHECK(fp_mode_ ==
     885             :              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
     886             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     887             :       // Don't overwrite the returned value.
     888         677 :       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
     889         677 :       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
     890             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     891             :       DCHECK(caller_registers_saved_);
     892         677 :       caller_registers_saved_ = false;
     893         677 :       break;
     894             :     }
     895             :     case kArchPrepareTailCall:
     896      118292 :       AssemblePrepareTailCall();
     897      118292 :       break;
     898             :     case kArchCallCFunction: {
     899             :       int const num_parameters = MiscField::decode(instr->opcode());
     900       26287 :       if (HasImmediateInput(instr, 0)) {
     901       25142 :         ExternalReference ref = i.InputExternalReference(0);
     902       25142 :         __ CallCFunction(ref, num_parameters);
     903             :       } else {
     904        1145 :         Register func = i.InputRegister(0);
     905        1145 :         __ CallCFunction(func, num_parameters);
     906             :       }
     907       26287 :       frame_access_state()->SetFrameAccessToDefault();
     908             :       // Ideally, we should decrement SP delta to match the change of stack
     909             :       // pointer in CallCFunction. However, for certain architectures (e.g.
     910             :       // ARM), there may be more strict alignment requirement, causing old SP
     911             :       // to be saved on the stack. In those cases, we can not calculate the SP
     912             :       // delta statically.
     913             :       frame_access_state()->ClearSPDelta();
     914       26287 :       if (caller_registers_saved_) {
     915             :         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
     916             :         // Here, we assume the sequence to be:
     917             :         //   kArchSaveCallerRegisters;
     918             :         //   kArchCallCFunction;
     919             :         //   kArchRestoreCallerRegisters;
     920             :         int bytes =
     921         677 :             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
     922         677 :         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     923             :       }
     924             :       // TODO(tebbi): Do we need an lfence here?
     925             :       break;
     926             :     }
     927             :     case kArchJmp:
     928     4863679 :       AssembleArchJump(i.InputRpo(0));
     929     4863712 :       break;
     930             :     case kArchBinarySearchSwitch:
     931       39506 :       AssembleArchBinarySearchSwitch(instr);
     932       39506 :       break;
     933             :     case kArchLookupSwitch:
     934           0 :       AssembleArchLookupSwitch(instr);
     935           0 :       break;
     936             :     case kArchTableSwitch:
     937         304 :       AssembleArchTableSwitch(instr);
     938         304 :       break;
     939             :     case kArchComment:
     940           5 :       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
     941           5 :       break;
     942             :     case kArchDebugAbort:
     943             :       DCHECK(i.InputRegister(0) == rdx);
     944         170 :       if (!frame_access_state()->has_frame()) {
     945             :         // We don't actually want to generate a pile of code for this, so just
     946             :         // claim there is a stack frame, without generating one.
     947           0 :         FrameScope scope(tasm(), StackFrame::NONE);
     948             :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     949           0 :                 RelocInfo::CODE_TARGET);
     950             :       } else {
     951             :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     952         170 :                 RelocInfo::CODE_TARGET);
     953             :       }
     954         170 :       __ int3();
     955             :       unwinding_info_writer_.MarkBlockWillExit();
     956             :       break;
     957             :     case kArchDebugBreak:
     958      134153 :       __ int3();
     959      134153 :       break;
     960             :     case kArchThrowTerminator:
     961             :       unwinding_info_writer_.MarkBlockWillExit();
     962             :       break;
     963             :     case kArchNop:
     964             :       // don't emit code for nops.
     965             :       break;
     966             :     case kArchDeoptimize: {
     967             :       int deopt_state_id =
     968       47591 :           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
     969             :       CodeGenResult result =
     970       47591 :           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
     971       47591 :       if (result != kSuccess) return result;
     972             :       unwinding_info_writer_.MarkBlockWillExit();
     973             :       break;
     974             :     }
     975             :     case kArchRet:
     976     3264254 :       AssembleReturn(instr->InputAt(0));
     977     3264217 :       break;
     978             :     case kArchStackPointer:
     979           0 :       __ movq(i.OutputRegister(), rsp);
     980             :       break;
     981             :     case kArchFramePointer:
     982       30547 :       __ movq(i.OutputRegister(), rbp);
     983             :       break;
     984             :     case kArchParentFramePointer:
     985       49229 :       if (frame_access_state()->has_frame()) {
     986       77127 :         __ movq(i.OutputRegister(), Operand(rbp, 0));
     987             :       } else {
     988       23520 :         __ movq(i.OutputRegister(), rbp);
     989             :       }
     990             :       break;
     991             :     case kArchTruncateDoubleToI: {
     992             :       auto result = i.OutputRegister();
     993             :       auto input = i.InputDoubleRegister(0);
     994             :       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
     995             :           this, result, input, DetermineStubCallMode(),
     996      111786 :           &unwinding_info_writer_);
     997             :       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
     998             :       // use of Cvttsd2siq requires the movl below to avoid sign extension.
     999       55881 :       __ Cvttsd2siq(result, input);
    1000       55903 :       __ cmpq(result, Immediate(1));
    1001       55902 :       __ j(overflow, ool->entry());
    1002       55902 :       __ bind(ool->exit());
    1003             :       __ movl(result, result);
    1004             :       break;
    1005             :     }
    1006             :     case kArchStoreWithWriteBarrier: {
    1007             :       RecordWriteMode mode =
    1008             :           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
    1009             :       Register object = i.InputRegister(0);
    1010      313550 :       size_t index = 0;
    1011      313550 :       Operand operand = i.MemoryOperand(&index);
    1012      313550 :       Register value = i.InputRegister(index);
    1013             :       Register scratch0 = i.TempRegister(0);
    1014             :       Register scratch1 = i.TempRegister(1);
    1015             :       auto ool = new (zone())
    1016             :           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
    1017      627100 :                                mode, DetermineStubCallMode());
    1018      313550 :       __ movp(operand, value);
    1019             :       __ CheckPageFlag(object, scratch0,
    1020             :                        MemoryChunk::kPointersFromHereAreInterestingMask,
    1021      313550 :                        not_zero, ool->entry());
    1022      313550 :       __ bind(ool->exit());
    1023             :       break;
    1024             :     }
    1025             :     case kArchWordPoisonOnSpeculation:
    1026             :       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
    1027           0 :       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
    1028           0 :       break;
    1029             :     case kLFence:
    1030           0 :       __ lfence();
    1031           0 :       break;
    1032             :     case kArchStackSlot: {
    1033             :       FrameOffset offset =
    1034        2512 :           frame_access_state()->GetFrameOffset(i.InputInt32(0));
    1035        1256 :       Register base = offset.from_stack_pointer() ? rsp : rbp;
    1036        3768 :       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
    1037             :       break;
    1038             :     }
    1039             :     case kIeee754Float64Acos:
    1040         117 :       ASSEMBLE_IEEE754_UNOP(acos);
    1041         117 :       break;
    1042             :     case kIeee754Float64Acosh:
    1043         117 :       ASSEMBLE_IEEE754_UNOP(acosh);
    1044         117 :       break;
    1045             :     case kIeee754Float64Asin:
    1046         117 :       ASSEMBLE_IEEE754_UNOP(asin);
    1047         117 :       break;
    1048             :     case kIeee754Float64Asinh:
    1049         117 :       ASSEMBLE_IEEE754_UNOP(asinh);
    1050         117 :       break;
    1051             :     case kIeee754Float64Atan:
    1052         137 :       ASSEMBLE_IEEE754_UNOP(atan);
    1053         137 :       break;
    1054             :     case kIeee754Float64Atanh:
    1055         117 :       ASSEMBLE_IEEE754_UNOP(atanh);
    1056         117 :       break;
    1057             :     case kIeee754Float64Atan2:
    1058         134 :       ASSEMBLE_IEEE754_BINOP(atan2);
    1059         134 :       break;
    1060             :     case kIeee754Float64Cbrt:
    1061         117 :       ASSEMBLE_IEEE754_UNOP(cbrt);
    1062         117 :       break;
    1063             :     case kIeee754Float64Cos:
    1064         295 :       ASSEMBLE_IEEE754_UNOP(cos);
    1065         295 :       break;
    1066             :     case kIeee754Float64Cosh:
    1067         124 :       ASSEMBLE_IEEE754_UNOP(cosh);
    1068         124 :       break;
    1069             :     case kIeee754Float64Exp:
    1070         152 :       ASSEMBLE_IEEE754_UNOP(exp);
    1071         152 :       break;
    1072             :     case kIeee754Float64Expm1:
    1073         124 :       ASSEMBLE_IEEE754_UNOP(expm1);
    1074         124 :       break;
    1075             :     case kIeee754Float64Log:
    1076         288 :       ASSEMBLE_IEEE754_UNOP(log);
    1077         288 :       break;
    1078             :     case kIeee754Float64Log1p:
    1079         117 :       ASSEMBLE_IEEE754_UNOP(log1p);
    1080         117 :       break;
    1081             :     case kIeee754Float64Log2:
    1082         117 :       ASSEMBLE_IEEE754_UNOP(log2);
    1083         117 :       break;
    1084             :     case kIeee754Float64Log10:
    1085         117 :       ASSEMBLE_IEEE754_UNOP(log10);
    1086         117 :       break;
    1087             :     case kIeee754Float64Pow: {
    1088             :       // TODO(bmeurer): Improve integration of the stub.
    1089             :       __ Movsd(xmm2, xmm0);
    1090         342 :       __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
    1091             :       __ Movsd(xmm0, xmm3);
    1092             :       break;
    1093             :     }
    1094             :     case kIeee754Float64Sin:
    1095         298 :       ASSEMBLE_IEEE754_UNOP(sin);
    1096         298 :       break;
    1097             :     case kIeee754Float64Sinh:
    1098         124 :       ASSEMBLE_IEEE754_UNOP(sinh);
    1099         124 :       break;
    1100             :     case kIeee754Float64Tan:
    1101         173 :       ASSEMBLE_IEEE754_UNOP(tan);
    1102         173 :       break;
    1103             :     case kIeee754Float64Tanh:
    1104         124 :       ASSEMBLE_IEEE754_UNOP(tanh);
    1105         124 :       break;
    1106             :     case kX64Add32:
    1107      377628 :       ASSEMBLE_BINOP(addl);
    1108             :       break;
    1109             :     case kX64Add:
    1110      370993 :       ASSEMBLE_BINOP(addq);
    1111             :       break;
    1112             :     case kX64Sub32:
    1113      239071 :       ASSEMBLE_BINOP(subl);
    1114             :       break;
    1115             :     case kX64Sub:
    1116      261409 :       ASSEMBLE_BINOP(subq);
    1117             :       break;
    1118             :     case kX64And32:
    1119      956474 :       ASSEMBLE_BINOP(andl);
    1120             :       break;
    1121             :     case kX64And:
    1122     1313217 :       ASSEMBLE_BINOP(andq);
    1123             :       break;
    1124             :     case kX64Cmp8:
    1125       39927 :       ASSEMBLE_COMPARE(cmpb);
    1126             :       break;
    1127             :     case kX64Cmp16:
    1128     1595502 :       ASSEMBLE_COMPARE(cmpw);
    1129             :       break;
    1130             :     case kX64Cmp32:
    1131     6188112 :       ASSEMBLE_COMPARE(cmpl);
    1132             :       break;
    1133             :     case kX64Cmp:
    1134     8344623 :       ASSEMBLE_COMPARE(cmpq);
    1135             :       break;
    1136             :     case kX64Test8:
    1137      309193 :       ASSEMBLE_COMPARE(testb);
    1138             :       break;
    1139             :     case kX64Test16:
    1140      119287 :       ASSEMBLE_COMPARE(testw);
    1141             :       break;
    1142             :     case kX64Test32:
    1143      517169 :       ASSEMBLE_COMPARE(testl);
    1144             :       break;
    1145             :     case kX64Test:
    1146     3131043 :       ASSEMBLE_COMPARE(testq);
    1147             :       break;
    1148             :     case kX64Imul32:
    1149      315568 :       ASSEMBLE_MULT(imull);
    1150             :       break;
    1151             :     case kX64Imul:
    1152       85328 :       ASSEMBLE_MULT(imulq);
    1153             :       break;
    1154             :     case kX64ImulHigh32:
    1155       15456 :       if (instr->InputAt(1)->IsRegister()) {
    1156        7728 :         __ imull(i.InputRegister(1));
    1157             :       } else {
    1158           0 :         __ imull(i.InputOperand(1));
    1159             :       }
    1160             :       break;
    1161             :     case kX64UmulHigh32:
    1162        3106 :       if (instr->InputAt(1)->IsRegister()) {
    1163        1553 :         __ mull(i.InputRegister(1));
    1164             :       } else {
    1165           0 :         __ mull(i.InputOperand(1));
    1166             :       }
    1167             :       break;
    1168             :     case kX64Idiv32:
    1169       74564 :       __ cdq();
    1170       74564 :       __ idivl(i.InputRegister(1));
    1171             :       break;
    1172             :     case kX64Idiv:
    1173        4670 :       __ cqo();
    1174        4670 :       __ idivq(i.InputRegister(1));
    1175             :       break;
    1176             :     case kX64Udiv32:
    1177       71331 :       __ xorl(rdx, rdx);
    1178       71331 :       __ divl(i.InputRegister(1));
    1179             :       break;
    1180             :     case kX64Udiv:
    1181        3637 :       __ xorq(rdx, rdx);
    1182        3637 :       __ divq(i.InputRegister(1));
    1183             :       break;
    1184             :     case kX64Not:
    1185         110 :       ASSEMBLE_UNOP(notq);
    1186             :       break;
    1187             :     case kX64Not32:
    1188        6846 :       ASSEMBLE_UNOP(notl);
    1189             :       break;
    1190             :     case kX64Neg:
    1191       21304 :       ASSEMBLE_UNOP(negq);
    1192             :       break;
    1193             :     case kX64Neg32:
    1194       14090 :       ASSEMBLE_UNOP(negl);
    1195             :       break;
    1196             :     case kX64Or32:
    1197      443541 :       ASSEMBLE_BINOP(orl);
    1198             :       break;
    1199             :     case kX64Or:
    1200      362694 :       ASSEMBLE_BINOP(orq);
    1201             :       break;
    1202             :     case kX64Xor32:
    1203      192976 :       ASSEMBLE_BINOP(xorl);
    1204             :       break;
    1205             :     case kX64Xor:
    1206        1571 :       ASSEMBLE_BINOP(xorq);
    1207             :       break;
    1208             :     case kX64Shl32:
    1209      199011 :       ASSEMBLE_SHIFT(shll, 5);
    1210             :       break;
    1211             :     case kX64Shl:
    1212     1500647 :       ASSEMBLE_SHIFT(shlq, 6);
    1213             :       break;
    1214             :     case kX64Shr32:
    1215      530612 :       ASSEMBLE_SHIFT(shrl, 5);
    1216             :       break;
    1217             :     case kX64Shr:
    1218     1502533 :       ASSEMBLE_SHIFT(shrq, 6);
    1219             :       break;
    1220             :     case kX64Sar32:
    1221      181693 :       ASSEMBLE_SHIFT(sarl, 5);
    1222             :       break;
    1223             :     case kX64Sar:
    1224      721497 :       ASSEMBLE_SHIFT(sarq, 6);
    1225             :       break;
    1226             :     case kX64Ror32:
    1227      272648 :       ASSEMBLE_SHIFT(rorl, 5);
    1228             :       break;
    1229             :     case kX64Ror:
    1230         618 :       ASSEMBLE_SHIFT(rorq, 6);
    1231             :       break;
    1232             :     case kX64Lzcnt:
    1233          94 :       if (instr->InputAt(0)->IsRegister()) {
    1234          47 :         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
    1235             :       } else {
    1236           0 :         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
    1237             :       }
    1238             :       break;
    1239             :     case kX64Lzcnt32:
    1240        1694 :       if (instr->InputAt(0)->IsRegister()) {
    1241         847 :         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
    1242             :       } else {
    1243           0 :         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
    1244             :       }
    1245             :       break;
    1246             :     case kX64Tzcnt:
    1247         112 :       if (instr->InputAt(0)->IsRegister()) {
    1248          56 :         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
    1249             :       } else {
    1250           0 :         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
    1251             :       }
    1252             :       break;
    1253             :     case kX64Tzcnt32:
    1254        1474 :       if (instr->InputAt(0)->IsRegister()) {
    1255         737 :         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
    1256             :       } else {
    1257           0 :         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
    1258             :       }
    1259             :       break;
    1260             :     case kX64Popcnt:
    1261         124 :       if (instr->InputAt(0)->IsRegister()) {
    1262          62 :         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
    1263             :       } else {
    1264           0 :         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
    1265             :       }
    1266             :       break;
    1267             :     case kX64Popcnt32:
    1268         266 :       if (instr->InputAt(0)->IsRegister()) {
    1269         133 :         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
    1270             :       } else {
    1271           0 :         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
    1272             :       }
    1273             :       break;
    1274             :     case kX64Bswap:
    1275          15 :       __ bswapq(i.OutputRegister());
    1276          15 :       break;
    1277             :     case kX64Bswap32:
    1278          55 :       __ bswapl(i.OutputRegister());
    1279          55 :       break;
    1280             :     case kSSEFloat32Cmp:
    1281           0 :       ASSEMBLE_SSE_BINOP(Ucomiss);
    1282             :       break;
    1283             :     case kSSEFloat32Add:
    1284           0 :       ASSEMBLE_SSE_BINOP(addss);
    1285             :       break;
    1286             :     case kSSEFloat32Sub:
    1287           0 :       ASSEMBLE_SSE_BINOP(subss);
    1288             :       break;
    1289             :     case kSSEFloat32Mul:
    1290           0 :       ASSEMBLE_SSE_BINOP(mulss);
    1291             :       break;
    1292             :     case kSSEFloat32Div:
    1293           0 :       ASSEMBLE_SSE_BINOP(divss);
    1294             :       // Don't delete this mov. It may improve performance on some CPUs,
    1295             :       // when there is a (v)mulss depending on the result.
    1296           0 :       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1297           0 :       break;
    1298             :     case kSSEFloat32Abs: {
    1299             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1300           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1301           0 :       __ psrlq(kScratchDoubleReg, 33);
    1302           0 :       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1303           0 :       break;
    1304             :     }
    1305             :     case kSSEFloat32Neg: {
    1306             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1307           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1308           0 :       __ psllq(kScratchDoubleReg, 31);
    1309           0 :       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1310           0 :       break;
    1311             :     }
    1312             :     case kSSEFloat32Sqrt:
    1313         627 :       ASSEMBLE_SSE_UNOP(sqrtss);
    1314             :       break;
    1315             :     case kSSEFloat32ToFloat64:
    1316       72489 :       ASSEMBLE_SSE_UNOP(Cvtss2sd);
    1317             :       break;
    1318             :     case kSSEFloat32Round: {
    1319             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1320             :       RoundingMode const mode =
    1321             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1322             :       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1323             :       break;
    1324             :     }
    1325             :     case kSSEFloat32ToInt32:
    1326        1050 :       if (instr->InputAt(0)->IsFPRegister()) {
    1327         525 :         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1328             :       } else {
    1329           0 :         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
    1330             :       }
    1331             :       break;
    1332             :     case kSSEFloat32ToUint32: {
    1333         150 :       if (instr->InputAt(0)->IsFPRegister()) {
    1334          75 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1335             :       } else {
    1336           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1337             :       }
    1338             :       break;
    1339             :     }
    1340             :     case kSSEFloat64Cmp:
    1341        3448 :       ASSEMBLE_SSE_BINOP(Ucomisd);
    1342             :       break;
    1343             :     case kSSEFloat64Add:
    1344         960 :       ASSEMBLE_SSE_BINOP(addsd);
    1345             :       break;
    1346             :     case kSSEFloat64Sub:
    1347         660 :       ASSEMBLE_SSE_BINOP(subsd);
    1348             :       break;
    1349             :     case kSSEFloat64Mul:
    1350         126 :       ASSEMBLE_SSE_BINOP(mulsd);
    1351             :       break;
    1352             :     case kSSEFloat64Div:
    1353          78 :       ASSEMBLE_SSE_BINOP(divsd);
    1354             :       // Don't delete this mov. It may improve performance on some CPUs,
    1355             :       // when there is a (v)mulsd depending on the result.
    1356          26 :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1357             :       break;
    1358             :     case kSSEFloat64Mod: {
    1359        1628 :       __ subq(rsp, Immediate(kDoubleSize));
    1360             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1361        3256 :                                                        kDoubleSize);
    1362             :       // Move values to st(0) and st(1).
    1363        4884 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
    1364        1628 :       __ fld_d(Operand(rsp, 0));
    1365        4884 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    1366        1628 :       __ fld_d(Operand(rsp, 0));
    1367             :       // Loop while fprem isn't done.
    1368        1628 :       Label mod_loop;
    1369        1628 :       __ bind(&mod_loop);
    1370             :       // This instructions traps on all kinds inputs, but we are assuming the
    1371             :       // floating point control word is set to ignore them all.
    1372        1628 :       __ fprem();
    1373             :       // The following 2 instruction implicitly use rax.
    1374        1628 :       __ fnstsw_ax();
    1375        1628 :       if (CpuFeatures::IsSupported(SAHF)) {
    1376             :         CpuFeatureScope sahf_scope(tasm(), SAHF);
    1377        1596 :         __ sahf();
    1378             :       } else {
    1379             :         __ shrl(rax, Immediate(8));
    1380          32 :         __ andl(rax, Immediate(0xFF));
    1381          32 :         __ pushq(rax);
    1382             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1383          64 :                                                          kSystemPointerSize);
    1384          32 :         __ popfq();
    1385             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1386          64 :                                                          -kSystemPointerSize);
    1387             :       }
    1388        1628 :       __ j(parity_even, &mod_loop);
    1389             :       // Move output to stack and clean up.
    1390        1628 :       __ fstp(1);
    1391        1628 :       __ fstp_d(Operand(rsp, 0));
    1392        3256 :       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
    1393        1628 :       __ addq(rsp, Immediate(kDoubleSize));
    1394             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1395        3256 :                                                        -kDoubleSize);
    1396             :       break;
    1397             :     }
    1398             :     case kSSEFloat32Max: {
    1399          89 :       Label compare_nan, compare_swap, done_compare;
    1400         178 :       if (instr->InputAt(1)->IsFPRegister()) {
    1401             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1402             :       } else {
    1403           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1404             :       }
    1405             :       auto ool =
    1406          89 :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1407          89 :       __ j(parity_even, ool->entry());
    1408          89 :       __ j(above, &done_compare, Label::kNear);
    1409          89 :       __ j(below, &compare_swap, Label::kNear);
    1410          89 :       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
    1411             :       __ testl(kScratchRegister, Immediate(1));
    1412          89 :       __ j(zero, &done_compare, Label::kNear);
    1413          89 :       __ bind(&compare_swap);
    1414         178 :       if (instr->InputAt(1)->IsFPRegister()) {
    1415          89 :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1416             :       } else {
    1417           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1418             :       }
    1419          89 :       __ bind(&done_compare);
    1420          89 :       __ bind(ool->exit());
    1421             :       break;
    1422             :     }
    1423             :     case kSSEFloat32Min: {
    1424          94 :       Label compare_swap, done_compare;
    1425         188 :       if (instr->InputAt(1)->IsFPRegister()) {
    1426             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1427             :       } else {
    1428           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1429             :       }
    1430             :       auto ool =
    1431          94 :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1432          94 :       __ j(parity_even, ool->entry());
    1433          94 :       __ j(below, &done_compare, Label::kNear);
    1434          94 :       __ j(above, &compare_swap, Label::kNear);
    1435         188 :       if (instr->InputAt(1)->IsFPRegister()) {
    1436          94 :         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
    1437             :       } else {
    1438           0 :         __ Movss(kScratchDoubleReg, i.InputOperand(1));
    1439             :         __ Movmskps(kScratchRegister, kScratchDoubleReg);
    1440             :       }
    1441             :       __ testl(kScratchRegister, Immediate(1));
    1442          94 :       __ j(zero, &done_compare, Label::kNear);
    1443          94 :       __ bind(&compare_swap);
    1444         188 :       if (instr->InputAt(1)->IsFPRegister()) {
    1445          94 :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1446             :       } else {
    1447           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1448             :       }
    1449          94 :       __ bind(&done_compare);
    1450          94 :       __ bind(ool->exit());
    1451             :       break;
    1452             :     }
    1453             :     case kSSEFloat64Max: {
    1454         282 :       Label compare_nan, compare_swap, done_compare;
    1455         564 :       if (instr->InputAt(1)->IsFPRegister()) {
    1456             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1457             :       } else {
    1458           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1459             :       }
    1460             :       auto ool =
    1461         282 :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1462         282 :       __ j(parity_even, ool->entry());
    1463         282 :       __ j(above, &done_compare, Label::kNear);
    1464         282 :       __ j(below, &compare_swap, Label::kNear);
    1465         282 :       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
    1466             :       __ testl(kScratchRegister, Immediate(1));
    1467         282 :       __ j(zero, &done_compare, Label::kNear);
    1468         282 :       __ bind(&compare_swap);
    1469         564 :       if (instr->InputAt(1)->IsFPRegister()) {
    1470         282 :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1471             :       } else {
    1472           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1473             :       }
    1474         282 :       __ bind(&done_compare);
    1475         282 :       __ bind(ool->exit());
    1476             :       break;
    1477             :     }
    1478             :     case kSSEFloat64Min: {
    1479         365 :       Label compare_swap, done_compare;
    1480         730 :       if (instr->InputAt(1)->IsFPRegister()) {
    1481             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1482             :       } else {
    1483           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1484             :       }
    1485             :       auto ool =
    1486         365 :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1487         365 :       __ j(parity_even, ool->entry());
    1488         365 :       __ j(below, &done_compare, Label::kNear);
    1489         365 :       __ j(above, &compare_swap, Label::kNear);
    1490         730 :       if (instr->InputAt(1)->IsFPRegister()) {
    1491         365 :         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
    1492             :       } else {
    1493           0 :         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
    1494             :         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
    1495             :       }
    1496             :       __ testl(kScratchRegister, Immediate(1));
    1497         365 :       __ j(zero, &done_compare, Label::kNear);
    1498         365 :       __ bind(&compare_swap);
    1499         730 :       if (instr->InputAt(1)->IsFPRegister()) {
    1500         365 :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1501             :       } else {
    1502           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1503             :       }
    1504         365 :       __ bind(&done_compare);
    1505         365 :       __ bind(ool->exit());
    1506             :       break;
    1507             :     }
    1508             :     case kSSEFloat64Abs: {
    1509             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1510           6 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1511           6 :       __ psrlq(kScratchDoubleReg, 1);
    1512          12 :       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1513           6 :       break;
    1514             :     }
    1515             :     case kSSEFloat64Neg: {
    1516             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1517          76 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1518          76 :       __ psllq(kScratchDoubleReg, 63);
    1519         152 :       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1520          76 :       break;
    1521             :     }
    1522             :     case kSSEFloat64Sqrt:
    1523         954 :       ASSEMBLE_SSE_UNOP(Sqrtsd);
    1524             :       break;
    1525             :     case kSSEFloat64Round: {
    1526             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1527             :       RoundingMode const mode =
    1528             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1529             :       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1530             :       break;
    1531             :     }
    1532             :     case kSSEFloat64ToFloat32:
    1533       61212 :       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
    1534             :       break;
    1535             :     case kSSEFloat64ToInt32:
    1536      265016 :       if (instr->InputAt(0)->IsFPRegister()) {
    1537      108459 :         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1538             :       } else {
    1539       48098 :         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
    1540             :       }
    1541             :       break;
    1542             :     case kSSEFloat64ToUint32: {
    1543        1790 :       if (instr->InputAt(0)->IsFPRegister()) {
    1544         895 :         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1545             :       } else {
    1546           0 :         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
    1547             :       }
    1548        1790 :       if (MiscField::decode(instr->opcode())) {
    1549        1628 :         __ AssertZeroExtended(i.OutputRegister());
    1550             :       }
    1551             :       break;
    1552             :     }
    1553             :     case kSSEFloat32ToInt64:
    1554         142 :       if (instr->InputAt(0)->IsFPRegister()) {
    1555          71 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1556             :       } else {
    1557           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1558             :       }
    1559          71 :       if (instr->OutputCount() > 1) {
    1560         132 :         __ Set(i.OutputRegister(1), 1);
    1561          66 :         Label done;
    1562          66 :         Label fail;
    1563             :         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
    1564         132 :         if (instr->InputAt(0)->IsFPRegister()) {
    1565          66 :           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
    1566             :         } else {
    1567           0 :           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
    1568             :         }
    1569             :         // If the input is NaN, then the conversion fails.
    1570          66 :         __ j(parity_even, &fail);
    1571             :         // If the input is INT64_MIN, then the conversion succeeds.
    1572          66 :         __ j(equal, &done);
    1573         132 :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1574             :         // If the conversion results in INT64_MIN, but the input was not
    1575             :         // INT64_MIN, then the conversion fails.
    1576          66 :         __ j(no_overflow, &done);
    1577          66 :         __ bind(&fail);
    1578         132 :         __ Set(i.OutputRegister(1), 0);
    1579          66 :         __ bind(&done);
    1580             :       }
    1581             :       break;
    1582             :     case kSSEFloat64ToInt64:
    1583        1606 :       if (instr->InputAt(0)->IsFPRegister()) {
    1584         802 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
    1585             :       } else {
    1586           2 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
    1587             :       }
    1588         806 :       if (instr->OutputCount() > 1) {
    1589        1378 :         __ Set(i.OutputRegister(1), 1);
    1590         694 :         Label done;
    1591         694 :         Label fail;
    1592             :         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
    1593        1392 :         if (instr->InputAt(0)->IsFPRegister()) {
    1594         696 :           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
    1595             :         } else {
    1596           0 :           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
    1597             :         }
    1598             :         // If the input is NaN, then the conversion fails.
    1599         693 :         __ j(parity_even, &fail);
    1600             :         // If the input is INT64_MIN, then the conversion succeeds.
    1601         686 :         __ j(equal, &done);
    1602        1384 :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1603             :         // If the conversion results in INT64_MIN, but the input was not
    1604             :         // INT64_MIN, then the conversion fails.
    1605         693 :         __ j(no_overflow, &done);
    1606         692 :         __ bind(&fail);
    1607        1366 :         __ Set(i.OutputRegister(1), 0);
    1608         690 :         __ bind(&done);
    1609             :       }
    1610             :       break;
    1611             :     case kSSEFloat32ToUint64: {
    1612          71 :       Label fail;
    1613         137 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1614         142 :       if (instr->InputAt(0)->IsFPRegister()) {
    1615         142 :         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1616             :       } else {
    1617           0 :         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1618             :       }
    1619         137 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1620          71 :       __ bind(&fail);
    1621             :       break;
    1622             :     }
    1623             :     case kSSEFloat64ToUint64: {
    1624        3888 :       Label fail;
    1625        3963 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1626        7776 :       if (instr->InputAt(0)->IsFPRegister()) {
    1627        7776 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1628             :       } else {
    1629           0 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1630             :       }
    1631        3963 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1632        3888 :       __ bind(&fail);
    1633             :       break;
    1634             :     }
    1635             :     case kSSEInt32ToFloat64:
    1636      799556 :       if (instr->InputAt(0)->IsRegister()) {
    1637      395608 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1638             :       } else {
    1639        8340 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1640             :       }
    1641             :       break;
    1642             :     case kSSEInt32ToFloat32:
    1643        2190 :       if (instr->InputAt(0)->IsRegister()) {
    1644        1086 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1645             :       } else {
    1646          18 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1647             :       }
    1648             :       break;
    1649             :     case kSSEInt64ToFloat32:
    1650          84 :       if (instr->InputAt(0)->IsRegister()) {
    1651          42 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1652             :       } else {
    1653           0 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1654             :       }
    1655             :       break;
    1656             :     case kSSEInt64ToFloat64:
    1657        6762 :       if (instr->InputAt(0)->IsRegister()) {
    1658        1254 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1659             :       } else {
    1660        4254 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1661             :       }
    1662             :       break;
    1663             :     case kSSEUint64ToFloat32:
    1664          84 :       if (instr->InputAt(0)->IsRegister()) {
    1665          42 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1666             :       } else {
    1667           0 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1668             :       }
    1669             :       break;
    1670             :     case kSSEUint64ToFloat64:
    1671        7262 :       if (instr->InputAt(0)->IsRegister()) {
    1672        2399 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1673             :       } else {
    1674        2464 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1675             :       }
    1676             :       break;
    1677             :     case kSSEUint32ToFloat64:
    1678       21594 :       if (instr->InputAt(0)->IsRegister()) {
    1679         445 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1680             :       } else {
    1681       20704 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1682             :       }
    1683             :       break;
    1684             :     case kSSEUint32ToFloat32:
    1685         224 :       if (instr->InputAt(0)->IsRegister()) {
    1686         112 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1687             :       } else {
    1688           0 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1689             :       }
    1690             :       break;
    1691             :     case kSSEFloat64ExtractLowWord32:
    1692         234 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1693           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    1694             :       } else {
    1695             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    1696             :       }
    1697             :       break;
    1698             :     case kSSEFloat64ExtractHighWord32:
    1699      200434 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1700      126990 :         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
    1701             :       } else {
    1702       36722 :         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
    1703             :       }
    1704             :       break;
    1705             :     case kSSEFloat64InsertLowWord32:
    1706          10 :       if (instr->InputAt(1)->IsRegister()) {
    1707           5 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
    1708             :       } else {
    1709           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
    1710             :       }
    1711             :       break;
    1712             :     case kSSEFloat64InsertHighWord32:
    1713         234 :       if (instr->InputAt(1)->IsRegister()) {
    1714         117 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
    1715             :       } else {
    1716           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
    1717             :       }
    1718             :       break;
    1719             :     case kSSEFloat64LoadLowWord32:
    1720         224 :       if (instr->InputAt(0)->IsRegister()) {
    1721             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    1722             :       } else {
    1723           0 :         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
    1724             :       }
    1725             :       break;
    1726             :     case kAVXFloat32Cmp: {
    1727             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1728        5686 :       if (instr->InputAt(1)->IsFPRegister()) {
    1729        2822 :         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1730             :       } else {
    1731          42 :         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1732             :       }
    1733             :       break;
    1734             :     }
    1735             :     case kAVXFloat32Add:
    1736        6618 :       ASSEMBLE_AVX_BINOP(vaddss);
    1737             :       break;
    1738             :     case kAVXFloat32Sub:
    1739        9561 :       ASSEMBLE_AVX_BINOP(vsubss);
    1740             :       break;
    1741             :     case kAVXFloat32Mul:
    1742        3156 :       ASSEMBLE_AVX_BINOP(vmulss);
    1743             :       break;
    1744             :     case kAVXFloat32Div:
    1745        1248 :       ASSEMBLE_AVX_BINOP(vdivss);
    1746             :       // Don't delete this mov. It may improve performance on some CPUs,
    1747             :       // when there is a (v)mulss depending on the result.
    1748         416 :       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1749             :       break;
    1750             :     case kAVXFloat64Cmp: {
    1751             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1752      528294 :       if (instr->InputAt(1)->IsFPRegister()) {
    1753      239669 :         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1754             :       } else {
    1755       48956 :         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1756             :       }
    1757             :       break;
    1758             :     }
    1759             :     case kAVXFloat64Add:
    1760      241191 :       ASSEMBLE_AVX_BINOP(vaddsd);
    1761             :       break;
    1762             :     case kAVXFloat64Sub:
    1763       47571 :       ASSEMBLE_AVX_BINOP(vsubsd);
    1764             :       break;
    1765             :     case kAVXFloat64Mul:
    1766       42759 :       ASSEMBLE_AVX_BINOP(vmulsd);
    1767             :       break;
    1768             :     case kAVXFloat64Div:
    1769       36357 :       ASSEMBLE_AVX_BINOP(vdivsd);
    1770             :       // Don't delete this mov. It may improve performance on some CPUs,
    1771             :       // when there is a (v)mulsd depending on the result.
    1772       12119 :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1773             :       break;
    1774             :     case kAVXFloat32Abs: {
    1775             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1776             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1777          99 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1778             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
    1779         198 :       if (instr->InputAt(0)->IsFPRegister()) {
    1780             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1781          99 :                   i.InputDoubleRegister(0));
    1782             :       } else {
    1783             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1784           0 :                   i.InputOperand(0));
    1785             :       }
    1786             :       break;
    1787             :     }
    1788             :     case kAVXFloat32Neg: {
    1789             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1790             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1791         208 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1792             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
    1793         416 :       if (instr->InputAt(0)->IsFPRegister()) {
    1794             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1795         208 :                   i.InputDoubleRegister(0));
    1796             :       } else {
    1797             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1798           0 :                   i.InputOperand(0));
    1799             :       }
    1800             :       break;
    1801             :     }
    1802             :     case kAVXFloat64Abs: {
    1803             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1804             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1805         657 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1806             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
    1807        1314 :       if (instr->InputAt(0)->IsFPRegister()) {
    1808             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1809         657 :                   i.InputDoubleRegister(0));
    1810             :       } else {
    1811             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1812           0 :                   i.InputOperand(0));
    1813             :       }
    1814             :       break;
    1815             :     }
    1816             :     case kAVXFloat64Neg: {
    1817             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1818             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1819        9895 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1820             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
    1821       19790 :       if (instr->InputAt(0)->IsFPRegister()) {
    1822             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1823        9805 :                   i.InputDoubleRegister(0));
    1824             :       } else {
    1825             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1826          90 :                   i.InputOperand(0));
    1827             :       }
    1828             :       break;
    1829             :     }
    1830             :     case kSSEFloat64SilenceNaN:
    1831             :       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
    1832        5818 :       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
    1833             :       break;
    1834             :     case kX64Movsxbl:
    1835      104512 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1836      157502 :       ASSEMBLE_MOVX(movsxbl);
    1837      104512 :       __ AssertZeroExtended(i.OutputRegister());
    1838       52256 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1839       52256 :       break;
    1840             :     case kX64Movzxbl:
    1841      360668 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1842      543202 :       ASSEMBLE_MOVX(movzxbl);
    1843      360668 :       __ AssertZeroExtended(i.OutputRegister());
    1844      180334 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1845      180334 :       break;
    1846             :     case kX64Movsxbq:
    1847       27250 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1848       40885 :       ASSEMBLE_MOVX(movsxbq);
    1849       13625 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1850       13625 :       break;
    1851             :     case kX64Movzxbq:
    1852       28722 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1853       43083 :       ASSEMBLE_MOVX(movzxbq);
    1854       28722 :       __ AssertZeroExtended(i.OutputRegister());
    1855       14361 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1856       14361 :       break;
    1857             :     case kX64Movb: {
    1858      177842 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1859       88920 :       size_t index = 0;
    1860       88920 :       Operand operand = i.MemoryOperand(&index);
    1861      177842 :       if (HasImmediateInput(instr, index)) {
    1862       13792 :         __ movb(operand, Immediate(i.InputInt8(index)));
    1863             :       } else {
    1864      164050 :         __ movb(operand, i.InputRegister(index));
    1865             :       }
    1866       88921 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1867             :       break;
    1868             :     }
    1869             :     case kX64Movsxwl:
    1870       24208 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1871       36889 :       ASSEMBLE_MOVX(movsxwl);
    1872       24208 :       __ AssertZeroExtended(i.OutputRegister());
    1873       12104 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1874       12104 :       break;
    1875             :     case kX64Movzxwl:
    1876      305840 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1877      465580 :       ASSEMBLE_MOVX(movzxwl);
    1878      305840 :       __ AssertZeroExtended(i.OutputRegister());
    1879      152920 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1880      152920 :       break;
    1881             :     case kX64Movsxwq:
    1882       18066 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1883       27109 :       ASSEMBLE_MOVX(movsxwq);
    1884             :       break;
    1885             :     case kX64Movzxwq:
    1886        1344 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1887        2016 :       ASSEMBLE_MOVX(movzxwq);
    1888        1344 :       __ AssertZeroExtended(i.OutputRegister());
    1889         672 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1890         672 :       break;
    1891             :     case kX64Movw: {
    1892       25752 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1893       12876 :       size_t index = 0;
    1894       12876 :       Operand operand = i.MemoryOperand(&index);
    1895       25752 :       if (HasImmediateInput(instr, index)) {
    1896        1940 :         __ movw(operand, Immediate(i.InputInt16(index)));
    1897             :       } else {
    1898       23812 :         __ movw(operand, i.InputRegister(index));
    1899             :       }
    1900       12876 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1901             :       break;
    1902             :     }
    1903             :     case kX64Movl:
    1904     5128658 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1905     2564350 :       if (instr->HasOutput()) {
    1906      725818 :         if (instr->addressing_mode() == kMode_None) {
    1907      147654 :           if (instr->InputAt(0)->IsRegister()) {
    1908      139880 :             __ movl(i.OutputRegister(), i.InputRegister(0));
    1909             :           } else {
    1910        7772 :             __ movl(i.OutputRegister(), i.InputOperand(0));
    1911             :           }
    1912             :         } else {
    1913     1304004 :           __ movl(i.OutputRegister(), i.MemoryOperand());
    1914             :         }
    1915     1451684 :         __ AssertZeroExtended(i.OutputRegister());
    1916             :       } else {
    1917     1838532 :         size_t index = 0;
    1918     1838532 :         Operand operand = i.MemoryOperand(&index);
    1919     3677104 :         if (HasImmediateInput(instr, index)) {
    1920      525765 :           __ movl(operand, i.InputImmediate(index));
    1921             :         } else {
    1922     2625582 :           __ movl(operand, i.InputRegister(index));
    1923             :         }
    1924             :       }
    1925     2564361 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1926     2564343 :       break;
    1927             :     case kX64Movsxlq:
    1928      273374 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1929      495906 :       ASSEMBLE_MOVX(movsxlq);
    1930      136687 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1931      136687 :       break;
    1932             :     case kX64MovqDecompressTaggedSigned: {
    1933           0 :       CHECK(instr->HasOutput());
    1934             :       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand(),
    1935           0 :                                 DEBUG_BOOL ? i.TempRegister(0) : no_reg);
    1936           0 :       break;
    1937             :     }
    1938             :     case kX64MovqDecompressTaggedPointer: {
    1939           0 :       CHECK(instr->HasOutput());
    1940             :       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand(),
    1941           0 :                                  DEBUG_BOOL ? i.TempRegister(0) : no_reg);
    1942           0 :       break;
    1943             :     }
    1944             :     case kX64MovqDecompressAnyTagged: {
    1945           0 :       CHECK(instr->HasOutput());
    1946             :       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
    1947             :                              i.TempRegister(0),
    1948           0 :                              DEBUG_BOOL ? i.TempRegister(1) : no_reg);
    1949           0 :       break;
    1950             :     }
    1951             :     case kX64Movq:
    1952    13577648 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1953     6788826 :       if (instr->HasOutput()) {
    1954     9190896 :         __ movq(i.OutputRegister(), i.MemoryOperand());
    1955             :       } else {
    1956     2193416 :         size_t index = 0;
    1957     2193416 :         Operand operand = i.MemoryOperand(&index);
    1958     4386836 :         if (HasImmediateInput(instr, index)) {
    1959       21161 :           __ movq(operand, i.InputImmediate(index));
    1960             :         } else {
    1961     4344514 :           __ movq(operand, i.InputRegister(index));
    1962             :         }
    1963             :       }
    1964     6788933 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1965     6788920 :       break;
    1966             :     case kX64Movss:
    1967       65914 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1968       32958 :       if (instr->HasOutput()) {
    1969       40414 :         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
    1970             :       } else {
    1971       12751 :         size_t index = 0;
    1972       12751 :         Operand operand = i.MemoryOperand(&index);
    1973       25502 :         __ movss(operand, i.InputDoubleRegister(index));
    1974             :       }
    1975             :       break;
    1976             :     case kX64Movsd: {
    1977     1252080 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1978      626073 :       if (instr->HasOutput()) {
    1979             :         const MemoryAccessMode access_mode =
    1980             :             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
    1981      433649 :         if (access_mode == kMemoryAccessPoisoned) {
    1982             :           // If we have to poison the loaded value, we load into a general
    1983             :           // purpose register first, mask it with the poison, and move the
    1984             :           // value from the general purpose register into the double register.
    1985           0 :           __ movq(kScratchRegister, i.MemoryOperand());
    1986           0 :           __ andq(kScratchRegister, kSpeculationPoisonRegister);
    1987           0 :           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
    1988             :         } else {
    1989      433653 :           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
    1990             :         }
    1991             :       } else {
    1992      192424 :         size_t index = 0;
    1993      192424 :         Operand operand = i.MemoryOperand(&index);
    1994      192425 :         __ Movsd(operand, i.InputDoubleRegister(index));
    1995             :       }
    1996             :       break;
    1997             :     }
    1998             :     case kX64Movdqu: {
    1999             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2000       51040 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2001       25520 :       if (instr->HasOutput()) {
    2002       34930 :         __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
    2003             :       } else {
    2004        8055 :         size_t index = 0;
    2005        8055 :         Operand operand = i.MemoryOperand(&index);
    2006       16110 :         __ movdqu(operand, i.InputSimd128Register(index));
    2007             :       }
    2008             :       break;
    2009             :     }
    2010             :     case kX64BitcastFI:
    2011        1336 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2012           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    2013             :       } else {
    2014             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    2015             :       }
    2016             :       break;
    2017             :     case kX64BitcastDL:
    2018        1230 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2019           0 :         __ movq(i.OutputRegister(), i.InputOperand(0));
    2020             :       } else {
    2021             :         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
    2022             :       }
    2023             :       break;
    2024             :     case kX64BitcastIF:
    2025         740 :       if (instr->InputAt(0)->IsRegister()) {
    2026             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    2027             :       } else {
    2028           0 :         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
    2029             :       }
    2030             :       break;
    2031             :     case kX64BitcastLD:
    2032         386 :       if (instr->InputAt(0)->IsRegister()) {
    2033             :         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
    2034             :       } else {
    2035           0 :         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
    2036             :       }
    2037             :       break;
    2038             :     case kX64Lea32: {
    2039             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2040             :       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
    2041             :       // and addressing mode just happens to work out. The "addl"/"subl" forms
    2042             :       // in these cases are faster based on measurements.
    2043      404087 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2044      157635 :         if (mode == kMode_MRI) {
    2045       78443 :           int32_t constant_summand = i.InputInt32(1);
    2046             :           DCHECK_NE(0, constant_summand);
    2047       78446 :           if (constant_summand > 0) {
    2048      110152 :             __ addl(i.OutputRegister(), Immediate(constant_summand));
    2049             :           } else {
    2050             :             __ subl(i.OutputRegister(),
    2051       46740 :                     Immediate(base::NegateWithWraparound(constant_summand)));
    2052             :           }
    2053       79192 :         } else if (mode == kMode_MR1) {
    2054       13542 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2055        1069 :             __ shll(i.OutputRegister(), Immediate(1));
    2056             :           } else {
    2057       12473 :             __ addl(i.OutputRegister(), i.InputRegister(1));
    2058             :           }
    2059       65650 :         } else if (mode == kMode_M2) {
    2060           0 :           __ shll(i.OutputRegister(), Immediate(1));
    2061       65650 :         } else if (mode == kMode_M4) {
    2062         398 :           __ shll(i.OutputRegister(), Immediate(2));
    2063       65252 :         } else if (mode == kMode_M8) {
    2064        3300 :           __ shll(i.OutputRegister(), Immediate(3));
    2065             :         } else {
    2066      123904 :           __ leal(i.OutputRegister(), i.MemoryOperand());
    2067             :         }
    2068      279610 :       } else if (mode == kMode_MR1 &&
    2069             :                  i.InputRegister(1) == i.OutputRegister()) {
    2070       20695 :         __ addl(i.OutputRegister(), i.InputRegister(0));
    2071             :       } else {
    2072      451508 :         __ leal(i.OutputRegister(), i.MemoryOperand());
    2073             :       }
    2074      808216 :       __ AssertZeroExtended(i.OutputRegister());
    2075      404110 :       break;
    2076             :     }
    2077             :     case kX64Lea: {
    2078             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2079             :       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
    2080             :       // and addressing mode just happens to work out. The "addq"/"subq" forms
    2081             :       // in these cases are faster based on measurements.
    2082     2113927 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2083      631884 :         if (mode == kMode_MRI) {
    2084      520643 :           int32_t constant_summand = i.InputInt32(1);
    2085      520643 :           if (constant_summand > 0) {
    2086      854758 :             __ addq(i.OutputRegister(), Immediate(constant_summand));
    2087       93264 :           } else if (constant_summand < 0) {
    2088      279669 :             __ subq(i.OutputRegister(), Immediate(-constant_summand));
    2089             :           }
    2090      111241 :         } else if (mode == kMode_MR1) {
    2091       52470 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2092        2055 :             __ shlq(i.OutputRegister(), Immediate(1));
    2093             :           } else {
    2094       50415 :             __ addq(i.OutputRegister(), i.InputRegister(1));
    2095             :           }
    2096       58771 :         } else if (mode == kMode_M2) {
    2097           0 :           __ shlq(i.OutputRegister(), Immediate(1));
    2098       58771 :         } else if (mode == kMode_M4) {
    2099         336 :           __ shlq(i.OutputRegister(), Immediate(2));
    2100       58435 :         } else if (mode == kMode_M8) {
    2101       11679 :           __ shlq(i.OutputRegister(), Immediate(3));
    2102             :         } else {
    2103       93512 :           __ leaq(i.OutputRegister(), i.MemoryOperand());
    2104             :         }
    2105     1751160 :       } else if (mode == kMode_MR1 &&
    2106             :                  i.InputRegister(1) == i.OutputRegister()) {
    2107      193233 :         __ addq(i.OutputRegister(), i.InputRegister(0));
    2108             :       } else {
    2109     2577630 :         __ leaq(i.OutputRegister(), i.MemoryOperand());
    2110             :       }
    2111             :       break;
    2112             :     }
    2113             :     case kX64Dec32:
    2114           0 :       __ decl(i.OutputRegister());
    2115             :       break;
    2116             :     case kX64Inc32:
    2117           0 :       __ incl(i.OutputRegister());
    2118             :       break;
    2119             :     case kX64Push:
    2120     3058816 :       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
    2121       25846 :         size_t index = 0;
    2122       25846 :         Operand operand = i.MemoryOperand(&index);
    2123       25846 :         __ pushq(operand);
    2124             :         frame_access_state()->IncreaseSPDelta(1);
    2125             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2126       51692 :                                                          kSystemPointerSize);
    2127     3032970 :       } else if (HasImmediateInput(instr, 0)) {
    2128      201411 :         __ pushq(i.InputImmediate(0));
    2129             :         frame_access_state()->IncreaseSPDelta(1);
    2130             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2131      402822 :                                                          kSystemPointerSize);
    2132     2831559 :       } else if (instr->InputAt(0)->IsRegister()) {
    2133     2145814 :         __ pushq(i.InputRegister(0));
    2134             :         frame_access_state()->IncreaseSPDelta(1);
    2135             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2136     4291662 :                                                          kSystemPointerSize);
    2137     1362101 :       } else if (instr->InputAt(0)->IsFloatRegister() ||
    2138             :                  instr->InputAt(0)->IsDoubleRegister()) {
    2139             :         // TODO(titzer): use another machine instruction?
    2140       17983 :         __ subq(rsp, Immediate(kDoubleSize));
    2141             :         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
    2142             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2143       35966 :                                                          kDoubleSize);
    2144       53949 :         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    2145      667762 :       } else if (instr->InputAt(0)->IsSimd128Register()) {
    2146             :         // TODO(titzer): use another machine instruction?
    2147         150 :         __ subq(rsp, Immediate(kSimd128Size));
    2148             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2149             :                                               kSystemPointerSize);
    2150             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2151         300 :                                                          kSimd128Size);
    2152         450 :         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
    2153      685079 :       } else if (instr->InputAt(0)->IsStackSlot() ||
    2154      680417 :                  instr->InputAt(0)->IsFloatStackSlot() ||
    2155             :                  instr->InputAt(0)->IsDoubleStackSlot()) {
    2156      667282 :         __ pushq(i.InputOperand(0));
    2157             :         frame_access_state()->IncreaseSPDelta(1);
    2158             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2159     1334566 :                                                          kSystemPointerSize);
    2160             :       } else {
    2161             :         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
    2162         330 :         __ Movups(kScratchDoubleReg, i.InputOperand(0));
    2163             :         // TODO(titzer): use another machine instruction?
    2164         330 :         __ subq(rsp, Immediate(kSimd128Size));
    2165             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2166             :                                               kSystemPointerSize);
    2167             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2168         660 :                                                          kSimd128Size);
    2169         660 :         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
    2170             :       }
    2171             :       break;
    2172             :     case kX64Poke: {
    2173             :       int slot = MiscField::decode(instr->opcode());
    2174        3400 :       if (HasImmediateInput(instr, 0)) {
    2175        2270 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
    2176             :       } else {
    2177        4530 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
    2178             :       }
    2179             :       break;
    2180             :     }
    2181             :     case kX64Peek: {
    2182        6338 :       int reverse_slot = i.InputInt32(0);
    2183             :       int offset =
    2184        6338 :           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
    2185        6338 :       if (instr->OutputAt(0)->IsFPRegister()) {
    2186             :         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
    2187        3160 :         if (op->representation() == MachineRepresentation::kFloat64) {
    2188        3160 :           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
    2189             :         } else {
    2190             :           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
    2191        3160 :           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
    2192             :         }
    2193             :       } else {
    2194        9534 :         __ movq(i.OutputRegister(), Operand(rbp, offset));
    2195             :       }
    2196             :       break;
    2197             :     }
    2198             :     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
    2199             :     case kX64F32x4Splat: {
    2200         350 :       XMMRegister dst = i.OutputSimd128Register();
    2201         700 :       if (instr->InputAt(0)->IsFPRegister()) {
    2202         350 :         __ movss(dst, i.InputDoubleRegister(0));
    2203             :       } else {
    2204           0 :         __ movss(dst, i.InputOperand(0));
    2205             :       }
    2206         350 :       __ shufps(dst, dst, 0x0);
    2207             :       break;
    2208             :     }
    2209             :     case kX64F32x4ExtractLane: {
    2210             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2211        1360 :       __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
    2212        1360 :       __ movd(i.OutputDoubleRegister(), kScratchRegister);
    2213             :       break;
    2214             :     }
    2215             :     case kX64F32x4ReplaceLane: {
    2216             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2217             :       // The insertps instruction uses imm8[5:4] to indicate the lane
    2218             :       // that needs to be replaced.
    2219          80 :       byte select = i.InputInt8(1) << 4 & 0x30;
    2220         160 :       __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
    2221             :       break;
    2222             :     }
    2223             :     case kX64F32x4SConvertI32x4: {
    2224           5 :       __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2225           5 :       break;
    2226             :     }
    2227             :     case kX64F32x4UConvertI32x4: {
    2228             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2229             :       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
    2230             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2231             :       XMMRegister dst = i.OutputSimd128Register();
    2232           5 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
    2233           5 :       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
    2234             :       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
    2235           5 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
    2236           5 :       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
    2237           5 :       __ cvtdq2ps(dst, dst);             // convert hi exactly
    2238           5 :       __ addps(dst, dst);                // double hi, exactly
    2239           5 :       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
    2240             :       break;
    2241             :     }
    2242             :     case kX64F32x4Abs: {
    2243             :       XMMRegister dst = i.OutputSimd128Register();
    2244             :       XMMRegister src = i.InputSimd128Register(0);
    2245          10 :       if (dst == src) {
    2246          10 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2247          10 :         __ psrld(kScratchDoubleReg, 1);
    2248          20 :         __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
    2249             :       } else {
    2250           0 :         __ pcmpeqd(dst, dst);
    2251           0 :         __ psrld(dst, 1);
    2252           0 :         __ andps(dst, i.InputSimd128Register(0));
    2253             :       }
    2254             :       break;
    2255             :     }
    2256             :     case kX64F32x4Neg: {
    2257             :       XMMRegister dst = i.OutputSimd128Register();
    2258             :       XMMRegister src = i.InputSimd128Register(0);
    2259          10 :       if (dst == src) {
    2260          10 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2261          10 :         __ pslld(kScratchDoubleReg, 31);
    2262          20 :         __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
    2263             :       } else {
    2264           0 :         __ pcmpeqd(dst, dst);
    2265           0 :         __ pslld(dst, 31);
    2266           0 :         __ xorps(dst, i.InputSimd128Register(0));
    2267             :       }
    2268             :       break;
    2269             :     }
    2270             :     case kX64F32x4RecipApprox: {
    2271          10 :       __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2272          10 :       break;
    2273             :     }
    2274             :     case kX64F32x4RecipSqrtApprox: {
    2275          10 :       __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2276          10 :       break;
    2277             :     }
    2278             :     case kX64F32x4Add: {
    2279             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2280          30 :       __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2281          30 :       break;
    2282             :     }
    2283             :     case kX64F32x4AddHoriz: {
    2284             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2285             :       CpuFeatureScope sse_scope(tasm(), SSE3);
    2286          10 :       __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2287             :       break;
    2288             :     }
    2289             :     case kX64F32x4Sub: {
    2290             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2291          10 :       __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2292          10 :       break;
    2293             :     }
    2294             :     case kX64F32x4Mul: {
    2295             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2296          10 :       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2297          10 :       break;
    2298             :     }
    2299             :     case kX64F32x4Min: {
    2300             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2301          10 :       __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2302          10 :       break;
    2303             :     }
    2304             :     case kX64F32x4Max: {
    2305             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2306          10 :       __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2307          10 :       break;
    2308             :     }
    2309             :     case kX64F32x4Eq: {
    2310             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2311          10 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
    2312          10 :       break;
    2313             :     }
    2314             :     case kX64F32x4Ne: {
    2315             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2316          10 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
    2317          10 :       break;
    2318             :     }
    2319             :     case kX64F32x4Lt: {
    2320             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2321          20 :       __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2322             :       break;
    2323             :     }
    2324             :     case kX64F32x4Le: {
    2325             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2326          20 :       __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2327             :       break;
    2328             :     }
    2329             :     case kX64I32x4Splat: {
    2330        2090 :       XMMRegister dst = i.OutputSimd128Register();
    2331        2090 :       __ movd(dst, i.InputRegister(0));
    2332        2090 :       __ pshufd(dst, dst, 0x0);
    2333             :       break;
    2334             :     }
    2335             :     case kX64I32x4ExtractLane: {
    2336             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2337       15384 :       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
    2338             :       break;
    2339             :     }
    2340             :     case kX64I32x4ReplaceLane: {
    2341             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2342        4560 :       if (instr->InputAt(2)->IsRegister()) {
    2343             :         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
    2344         840 :                   i.InputInt8(1));
    2345             :       } else {
    2346        3720 :         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2347             :       }
    2348             :       break;
    2349             :     }
    2350             :     case kX64I32x4SConvertF32x4: {
    2351             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2352             :       XMMRegister dst = i.OutputSimd128Register();
    2353             :       // NAN->0
    2354          10 :       __ movaps(kScratchDoubleReg, dst);
    2355             :       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
    2356             :       __ pand(dst, kScratchDoubleReg);
    2357             :       // Set top bit if >= 0 (but not -0.0!)
    2358             :       __ pxor(kScratchDoubleReg, dst);
    2359             :       // Convert
    2360          10 :       __ cvttps2dq(dst, dst);
    2361             :       // Set top bit if >=0 is now < 0
    2362             :       __ pand(kScratchDoubleReg, dst);
    2363          10 :       __ psrad(kScratchDoubleReg, 31);
    2364             :       // Set positive overflow lanes to 0x7FFFFFFF
    2365             :       __ pxor(dst, kScratchDoubleReg);
    2366             :       break;
    2367             :     }
    2368             :     case kX64I32x4SConvertI16x8Low: {
    2369             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2370          10 :       __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2371             :       break;
    2372             :     }
    2373             :     case kX64I32x4SConvertI16x8High: {
    2374             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2375             :       XMMRegister dst = i.OutputSimd128Register();
    2376          10 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2377             :       __ pmovsxwd(dst, dst);
    2378             :       break;
    2379             :     }
    2380             :     case kX64I32x4Neg: {
    2381             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2382             :       XMMRegister dst = i.OutputSimd128Register();
    2383             :       XMMRegister src = i.InputSimd128Register(0);
    2384          10 :       if (dst == src) {
    2385          10 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2386             :         __ psignd(dst, kScratchDoubleReg);
    2387             :       } else {
    2388           0 :         __ pxor(dst, dst);
    2389             :         __ psubd(dst, src);
    2390             :       }
    2391             :       break;
    2392             :     }
    2393             :     case kX64I32x4Shl: {
    2394         620 :       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
    2395         310 :       break;
    2396             :     }
    2397             :     case kX64I32x4ShrS: {
    2398         620 :       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
    2399         310 :       break;
    2400             :     }
    2401             :     case kX64I32x4Add: {
    2402          30 :       __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2403             :       break;
    2404             :     }
    2405             :     case kX64I32x4AddHoriz: {
    2406             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2407          10 :       __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2408             :       break;
    2409             :     }
    2410             :     case kX64I32x4Sub: {
    2411          10 :       __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2412             :       break;
    2413             :     }
    2414             :     case kX64I32x4Mul: {
    2415             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2416          10 :       __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2417             :       break;
    2418             :     }
    2419             :     case kX64I32x4MinS: {
    2420             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2421          10 :       __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2422             :       break;
    2423             :     }
    2424             :     case kX64I32x4MaxS: {
    2425             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2426          10 :       __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2427             :       break;
    2428             :     }
    2429             :     case kX64I32x4Eq: {
    2430          30 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2431             :       break;
    2432             :     }
    2433             :     case kX64I32x4Ne: {
    2434          35 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2435             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2436          35 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2437             :       break;
    2438             :     }
    2439             :     case kX64I32x4GtS: {
    2440          20 :       __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2441             :       break;
    2442             :     }
    2443             :     case kX64I32x4GeS: {
    2444             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2445             :       XMMRegister dst = i.OutputSimd128Register();
    2446             :       XMMRegister src = i.InputSimd128Register(1);
    2447          20 :       __ pminsd(dst, src);
    2448             :       __ pcmpeqd(dst, src);
    2449             :       break;
    2450             :     }
    2451             :     case kX64I32x4UConvertF32x4: {
    2452             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2453             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2454             :       XMMRegister dst = i.OutputSimd128Register();
    2455          10 :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2456             :       // NAN->0, negative->0
    2457          10 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2458          10 :       __ maxps(dst, kScratchDoubleReg);
    2459             :       // scratch: float representation of max_signed
    2460             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2461          10 :       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
    2462          10 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
    2463             :       // tmp: convert (src-max_signed).
    2464             :       // Positive overflow lanes -> 0x7FFFFFFF
    2465             :       // Negative lanes -> 0
    2466          10 :       __ movaps(tmp, dst);
    2467          10 :       __ subps(tmp, kScratchDoubleReg);
    2468             :       __ cmpleps(kScratchDoubleReg, tmp);
    2469          10 :       __ cvttps2dq(tmp, tmp);
    2470             :       __ pxor(tmp, kScratchDoubleReg);
    2471             :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2472             :       __ pmaxsd(tmp, kScratchDoubleReg);
    2473             :       // convert. Overflow lanes above max_signed will be 0x80000000
    2474          10 :       __ cvttps2dq(dst, dst);
    2475             :       // Add (src-max_signed) for overflow lanes.
    2476             :       __ paddd(dst, tmp);
    2477             :       break;
    2478             :     }
    2479             :     case kX64I32x4UConvertI16x8Low: {
    2480             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2481          10 :       __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2482             :       break;
    2483             :     }
    2484             :     case kX64I32x4UConvertI16x8High: {
    2485             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2486             :       XMMRegister dst = i.OutputSimd128Register();
    2487          10 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2488             :       __ pmovzxwd(dst, dst);
    2489             :       break;
    2490             :     }
    2491             :     case kX64I32x4ShrU: {
    2492         620 :       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
    2493         310 :       break;
    2494             :     }
    2495             :     case kX64I32x4MinU: {
    2496             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2497          10 :       __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2498             :       break;
    2499             :     }
    2500             :     case kX64I32x4MaxU: {
    2501             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2502          10 :       __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2503             :       break;
    2504             :     }
    2505             :     case kX64I32x4GtU: {
    2506             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2507             :       XMMRegister dst = i.OutputSimd128Register();
    2508             :       XMMRegister src = i.InputSimd128Register(1);
    2509          20 :       __ pmaxud(dst, src);
    2510             :       __ pcmpeqd(dst, src);
    2511             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2512             :       __ pxor(dst, kScratchDoubleReg);
    2513             :       break;
    2514             :     }
    2515             :     case kX64I32x4GeU: {
    2516             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2517             :       XMMRegister dst = i.OutputSimd128Register();
    2518             :       XMMRegister src = i.InputSimd128Register(1);
    2519          20 :       __ pminud(dst, src);
    2520             :       __ pcmpeqd(dst, src);
    2521             :       break;
    2522             :     }
    2523             :     case kX64S128Zero: {
    2524          18 :       XMMRegister dst = i.OutputSimd128Register();
    2525          18 :       __ xorps(dst, dst);
    2526             :       break;
    2527             :     }
    2528             :     case kX64I16x8Splat: {
    2529         980 :       XMMRegister dst = i.OutputSimd128Register();
    2530         980 :       __ movd(dst, i.InputRegister(0));
    2531         980 :       __ pshuflw(dst, dst, 0x0);
    2532         980 :       __ pshufd(dst, dst, 0x0);
    2533             :       break;
    2534             :     }
    2535             :     case kX64I16x8ExtractLane: {
    2536             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2537        6600 :       Register dst = i.OutputRegister();
    2538       13200 :       __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2539        6600 :       __ movsxwl(dst, dst);
    2540             :       break;
    2541             :     }
    2542             :     case kX64I16x8ReplaceLane: {
    2543             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2544         240 :       if (instr->InputAt(2)->IsRegister()) {
    2545             :         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
    2546         240 :                   i.InputInt8(1));
    2547             :       } else {
    2548           0 :         __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2549             :       }
    2550             :       break;
    2551             :     }
    2552             :     case kX64I16x8SConvertI8x16Low: {
    2553             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2554          10 :       __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2555             :       break;
    2556             :     }
    2557             :     case kX64I16x8SConvertI8x16High: {
    2558             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2559             :       XMMRegister dst = i.OutputSimd128Register();
    2560          10 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2561             :       __ pmovsxbw(dst, dst);
    2562             :       break;
    2563             :     }
    2564             :     case kX64I16x8Neg: {
    2565             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2566             :       XMMRegister dst = i.OutputSimd128Register();
    2567             :       XMMRegister src = i.InputSimd128Register(0);
    2568          10 :       if (dst == src) {
    2569          10 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2570             :         __ psignw(dst, kScratchDoubleReg);
    2571             :       } else {
    2572           0 :         __ pxor(dst, dst);
    2573             :         __ psubw(dst, src);
    2574             :       }
    2575             :       break;
    2576             :     }
    2577             :     case kX64I16x8Shl: {
    2578         300 :       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
    2579         150 :       break;
    2580             :     }
    2581             :     case kX64I16x8ShrS: {
    2582         300 :       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
    2583         150 :       break;
    2584             :     }
    2585             :     case kX64I16x8SConvertI32x4: {
    2586             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2587          10 :       __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2588             :       break;
    2589             :     }
    2590             :     case kX64I16x8Add: {
    2591          10 :       __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2592             :       break;
    2593             :     }
    2594             :     case kX64I16x8AddSaturateS: {
    2595          10 :       __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2596             :       break;
    2597             :     }
    2598             :     case kX64I16x8AddHoriz: {
    2599             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2600          10 :       __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2601             :       break;
    2602             :     }
    2603             :     case kX64I16x8Sub: {
    2604          10 :       __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2605             :       break;
    2606             :     }
    2607             :     case kX64I16x8SubSaturateS: {
    2608          10 :       __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2609             :       break;
    2610             :     }
    2611             :     case kX64I16x8Mul: {
    2612             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2613          10 :       __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2614             :       break;
    2615             :     }
    2616             :     case kX64I16x8MinS: {
    2617             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2618          10 :       __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2619             :       break;
    2620             :     }
    2621             :     case kX64I16x8MaxS: {
    2622             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2623          10 :       __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2624             :       break;
    2625             :     }
    2626             :     case kX64I16x8Eq: {
    2627          30 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2628             :       break;
    2629             :     }
    2630             :     case kX64I16x8Ne: {
    2631          35 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2632             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2633          35 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2634             :       break;
    2635             :     }
    2636             :     case kX64I16x8GtS: {
    2637          20 :       __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2638             :       break;
    2639             :     }
    2640             :     case kX64I16x8GeS: {
    2641             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2642             :       XMMRegister dst = i.OutputSimd128Register();
    2643             :       XMMRegister src = i.InputSimd128Register(1);
    2644          20 :       __ pminsw(dst, src);
    2645             :       __ pcmpeqw(dst, src);
    2646             :       break;
    2647             :     }
    2648             :     case kX64I16x8UConvertI8x16Low: {
    2649             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2650          10 :       __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2651             :       break;
    2652             :     }
    2653             :     case kX64I16x8UConvertI8x16High: {
    2654             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2655             :       XMMRegister dst = i.OutputSimd128Register();
    2656          10 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2657             :       __ pmovzxbw(dst, dst);
    2658             :       break;
    2659             :     }
    2660             :     case kX64I16x8ShrU: {
    2661         300 :       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
    2662         150 :       break;
    2663             :     }
    2664             :     case kX64I16x8UConvertI32x4: {
    2665             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2666             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2667             :       XMMRegister dst = i.OutputSimd128Register();
    2668             :       // Change negative lanes to 0x7FFFFFFF
    2669          10 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2670          10 :       __ psrld(kScratchDoubleReg, 1);
    2671             :       __ pminud(dst, kScratchDoubleReg);
    2672          10 :       __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
    2673             :       __ packusdw(dst, kScratchDoubleReg);
    2674             :       break;
    2675             :     }
    2676             :     case kX64I16x8AddSaturateU: {
    2677          10 :       __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2678             :       break;
    2679             :     }
    2680             :     case kX64I16x8SubSaturateU: {
    2681          10 :       __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2682             :       break;
    2683             :     }
    2684             :     case kX64I16x8MinU: {
    2685             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2686          10 :       __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2687             :       break;
    2688             :     }
    2689             :     case kX64I16x8MaxU: {
    2690             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2691          10 :       __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2692             :       break;
    2693             :     }
    2694             :     case kX64I16x8GtU: {
    2695             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2696             :       XMMRegister dst = i.OutputSimd128Register();
    2697             :       XMMRegister src = i.InputSimd128Register(1);
    2698          20 :       __ pmaxuw(dst, src);
    2699             :       __ pcmpeqw(dst, src);
    2700             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2701             :       __ pxor(dst, kScratchDoubleReg);
    2702             :       break;
    2703             :     }
    2704             :     case kX64I16x8GeU: {
    2705             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2706             :       XMMRegister dst = i.OutputSimd128Register();
    2707             :       XMMRegister src = i.InputSimd128Register(1);
    2708          40 :       __ pminuw(dst, src);
    2709             :       __ pcmpeqw(dst, src);
    2710             :       break;
    2711             :     }
    2712             :     case kX64I8x16Splat: {
    2713             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2714             :       XMMRegister dst = i.OutputSimd128Register();
    2715         720 :       __ movd(dst, i.InputRegister(0));
    2716         720 :       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
    2717             :       __ pshufb(dst, kScratchDoubleReg);
    2718             :       break;
    2719             :     }
    2720             :     case kX64I8x16ExtractLane: {
    2721             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2722        9880 :       Register dst = i.OutputRegister();
    2723       19760 :       __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2724        9880 :       __ movsxbl(dst, dst);
    2725             :       break;
    2726             :     }
    2727             :     case kX64I8x16ReplaceLane: {
    2728             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2729         400 :       if (instr->InputAt(2)->IsRegister()) {
    2730             :         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
    2731         400 :                   i.InputInt8(1));
    2732             :       } else {
    2733           0 :         __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2734             :       }
    2735             :       break;
    2736             :     }
    2737             :     case kX64I8x16SConvertI16x8: {
    2738             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2739          10 :       __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2740             :       break;
    2741             :     }
    2742             :     case kX64I8x16Neg: {
    2743             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2744             :       XMMRegister dst = i.OutputSimd128Register();
    2745             :       XMMRegister src = i.InputSimd128Register(0);
    2746          10 :       if (dst == src) {
    2747          10 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2748             :         __ psignb(dst, kScratchDoubleReg);
    2749             :       } else {
    2750           0 :         __ pxor(dst, dst);
    2751             :         __ psubb(dst, src);
    2752             :       }
    2753             :       break;
    2754             :     }
    2755             :     case kX64I8x16Shl: {
    2756             :       XMMRegister dst = i.OutputSimd128Register();
    2757             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2758          70 :       int8_t shift = i.InputInt8(1) & 0x7;
    2759          70 :       if (shift < 4) {
    2760             :         // For small shifts, doubling is faster.
    2761          60 :         for (int i = 0; i < shift; ++i) {
    2762          60 :           __ paddb(dst, dst);
    2763             :         }
    2764             :       } else {
    2765             :         // Mask off the unwanted bits before word-shifting.
    2766          40 :         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2767          40 :         __ psrlw(kScratchDoubleReg, 8 + shift);
    2768             :         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
    2769             :         __ pand(dst, kScratchDoubleReg);
    2770          40 :         __ psllw(dst, shift);
    2771             :       }
    2772             :       break;
    2773             :     }
    2774             :     case kX64I8x16ShrS: {
    2775             :       XMMRegister dst = i.OutputSimd128Register();
    2776             :       XMMRegister src = i.InputSimd128Register(0);
    2777          70 :       int8_t shift = i.InputInt8(1) & 0x7;
    2778             :       // Unpack the bytes into words, do arithmetic shifts, and repack.
    2779          70 :       __ punpckhbw(kScratchDoubleReg, src);
    2780             :       __ punpcklbw(dst, src);
    2781          70 :       __ psraw(kScratchDoubleReg, 8 + shift);
    2782          70 :       __ psraw(dst, 8 + shift);
    2783             :       __ packsswb(dst, kScratchDoubleReg);
    2784             :       break;
    2785             :     }
    2786             :     case kX64I8x16Add: {
    2787          10 :       __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2788             :       break;
    2789             :     }
    2790             :     case kX64I8x16AddSaturateS: {
    2791          10 :       __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2792             :       break;
    2793             :     }
    2794             :     case kX64I8x16Sub: {
    2795          10 :       __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2796             :       break;
    2797             :     }
    2798             :     case kX64I8x16SubSaturateS: {
    2799          10 :       __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2800             :       break;
    2801             :     }
    2802             :     case kX64I8x16Mul: {
    2803             :       XMMRegister dst = i.OutputSimd128Register();
    2804             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2805             :       XMMRegister right = i.InputSimd128Register(1);
    2806          10 :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2807             :       // I16x8 view of I8x16
    2808             :       // left = AAaa AAaa ... AAaa AAaa
    2809             :       // right= BBbb BBbb ... BBbb BBbb
    2810             :       // t = 00AA 00AA ... 00AA 00AA
    2811             :       // s = 00BB 00BB ... 00BB 00BB
    2812          10 :       __ movaps(tmp, dst);
    2813          10 :       __ movaps(kScratchDoubleReg, right);
    2814          10 :       __ psrlw(tmp, 8);
    2815          10 :       __ psrlw(kScratchDoubleReg, 8);
    2816             :       // dst = left * 256
    2817          10 :       __ psllw(dst, 8);
    2818             :       // t = I16x8Mul(t, s)
    2819             :       //    => __PP __PP ...  __PP  __PP
    2820             :       __ pmullw(tmp, kScratchDoubleReg);
    2821             :       // dst = I16x8Mul(left * 256, right)
    2822             :       //    => pp__ pp__ ...  pp__  pp__
    2823             :       __ pmullw(dst, right);
    2824             :       // t = I16x8Shl(t, 8)
    2825             :       //    => PP00 PP00 ...  PP00  PP00
    2826          10 :       __ psllw(tmp, 8);
    2827             :       // dst = I16x8Shr(dst, 8)
    2828             :       //    => 00pp 00pp ...  00pp  00pp
    2829          10 :       __ psrlw(dst, 8);
    2830             :       // dst = I16x8Or(dst, t)
    2831             :       //    => PPpp PPpp ...  PPpp  PPpp
    2832             :       __ por(dst, tmp);
    2833             :       break;
    2834             :     }
    2835             :     case kX64I8x16MinS: {
    2836             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2837          10 :       __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2838             :       break;
    2839             :     }
    2840             :     case kX64I8x16MaxS: {
    2841             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2842          10 :       __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2843             :       break;
    2844             :     }
    2845             :     case kX64I8x16Eq: {
    2846          30 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2847             :       break;
    2848             :     }
    2849             :     case kX64I8x16Ne: {
    2850          35 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2851             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2852          35 :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2853             :       break;
    2854             :     }
    2855             :     case kX64I8x16GtS: {
    2856          20 :       __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2857             :       break;
    2858             :     }
    2859             :     case kX64I8x16GeS: {
    2860             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2861             :       XMMRegister dst = i.OutputSimd128Register();
    2862             :       XMMRegister src = i.InputSimd128Register(1);
    2863          20 :       __ pminsb(dst, src);
    2864             :       __ pcmpeqb(dst, src);
    2865             :       break;
    2866             :     }
    2867             :     case kX64I8x16UConvertI16x8: {
    2868             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2869             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2870             :       XMMRegister dst = i.OutputSimd128Register();
    2871             :       // Change negative lanes to 0x7FFF
    2872          10 :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2873          10 :       __ psrlw(kScratchDoubleReg, 1);
    2874             :       __ pminuw(dst, kScratchDoubleReg);
    2875          10 :       __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
    2876             :       __ packuswb(dst, kScratchDoubleReg);
    2877             :       break;
    2878             :     }
    2879             :     case kX64I8x16ShrU: {
    2880             :       XMMRegister dst = i.OutputSimd128Register();
    2881             :       XMMRegister src = i.InputSimd128Register(0);
    2882          70 :       int8_t shift = i.InputInt8(1) & 0x7;
    2883             :       // Unpack the bytes into words, do logical shifts, and repack.
    2884          70 :       __ punpckhbw(kScratchDoubleReg, src);
    2885             :       __ punpcklbw(dst, src);
    2886          70 :       __ psrlw(kScratchDoubleReg, 8 + shift);
    2887          70 :       __ psrlw(dst, 8 + shift);
    2888             :       __ packuswb(dst, kScratchDoubleReg);
    2889             :       break;
    2890             :     }
    2891             :     case kX64I8x16AddSaturateU: {
    2892          10 :       __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2893             :       break;
    2894             :     }
    2895             :     case kX64I8x16SubSaturateU: {
    2896          10 :       __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2897             :       break;
    2898             :     }
    2899             :     case kX64I8x16MinU: {
    2900             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2901          10 :       __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2902             :       break;
    2903             :     }
    2904             :     case kX64I8x16MaxU: {
    2905             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2906          10 :       __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2907             :       break;
    2908             :     }
    2909             :     case kX64I8x16GtU: {
    2910             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2911             :       XMMRegister dst = i.OutputSimd128Register();
    2912             :       XMMRegister src = i.InputSimd128Register(1);
    2913          20 :       __ pmaxub(dst, src);
    2914             :       __ pcmpeqb(dst, src);
    2915             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2916             :       __ pxor(dst, kScratchDoubleReg);
    2917             :       break;
    2918             :     }
    2919             :     case kX64I8x16GeU: {
    2920             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2921             :       XMMRegister dst = i.OutputSimd128Register();
    2922             :       XMMRegister src = i.InputSimd128Register(1);
    2923           0 :       __ pminub(dst, src);
    2924             :       __ pcmpeqb(dst, src);
    2925             :       break;
    2926             :     }
    2927             :     case kX64S128And: {
    2928          10 :       __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2929             :       break;
    2930             :     }
    2931             :     case kX64S128Or: {
    2932          10 :       __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2933             :       break;
    2934             :     }
    2935             :     case kX64S128Xor: {
    2936          10 :       __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2937             :       break;
    2938             :     }
    2939             :     case kX64S128Not: {
    2940             :       XMMRegister dst = i.OutputSimd128Register();
    2941             :       XMMRegister src = i.InputSimd128Register(0);
    2942          10 :       if (dst == src) {
    2943          10 :         __ movaps(kScratchDoubleReg, dst);
    2944             :         __ pcmpeqd(dst, dst);
    2945             :         __ pxor(dst, kScratchDoubleReg);
    2946             :       } else {
    2947           0 :         __ pcmpeqd(dst, dst);
    2948             :         __ pxor(dst, src);
    2949             :       }
    2950             : 
    2951             :       break;
    2952             :     }
    2953             :     case kX64S128Select: {
    2954             :       // Mask used here is stored in dst.
    2955          35 :       XMMRegister dst = i.OutputSimd128Register();
    2956          35 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
    2957          70 :       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
    2958          35 :       __ andps(dst, kScratchDoubleReg);
    2959          70 :       __ xorps(dst, i.InputSimd128Register(2));
    2960             :       break;
    2961             :     }
    2962             :     case kX64S8x16Shuffle: {
    2963             :       XMMRegister dst = i.OutputSimd128Register();
    2964             :       Register tmp = i.TempRegister(0);
    2965             :       // Prepare 16 byte aligned buffer for shuffle control mask
    2966        3830 :       __ movq(tmp, rsp);
    2967        3830 :       __ andq(rsp, Immediate(-16));
    2968        3830 :       if (instr->InputCount() == 5) {  // only one input operand
    2969        1700 :         uint32_t mask[4] = {};
    2970             :         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2971       10200 :         for (int j = 4; j > 0; j--) {
    2972       13600 :           mask[j - 1] = i.InputUint32(j);
    2973             :         }
    2974             : 
    2975        1700 :         SetupShuffleMaskOnStack(tasm(), mask);
    2976        3400 :         __ pshufb(dst, Operand(rsp, 0));
    2977             :       } else {  // two input operands
    2978             :         DCHECK_EQ(6, instr->InputCount());
    2979        6390 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
    2980        2130 :         uint32_t mask[4] = {};
    2981       10650 :         for (int j = 5; j > 1; j--) {
    2982        8520 :           uint32_t lanes = i.InputUint32(j);
    2983       42600 :           for (int k = 0; k < 32; k += 8) {
    2984       34080 :             uint8_t lane = lanes >> k;
    2985       34080 :             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
    2986             :           }
    2987             :         }
    2988        2130 :         SetupShuffleMaskOnStack(tasm(), mask);
    2989        4260 :         __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
    2990        2130 :         uint32_t mask1[4] = {};
    2991        4260 :         if (instr->InputAt(1)->IsSimd128Register()) {
    2992        2130 :           XMMRegister src1 = i.InputSimd128Register(1);
    2993        2130 :           if (src1 != dst) __ movups(dst, src1);
    2994             :         } else {
    2995           0 :           __ movups(dst, i.InputOperand(1));
    2996             :         }
    2997        8520 :         for (int j = 5; j > 1; j--) {
    2998        8520 :           uint32_t lanes = i.InputUint32(j);
    2999       42600 :           for (int k = 0; k < 32; k += 8) {
    3000       34080 :             uint8_t lane = lanes >> k;
    3001       34080 :             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
    3002             :           }
    3003             :         }
    3004        2130 :         SetupShuffleMaskOnStack(tasm(), mask1);
    3005        4260 :         __ pshufb(dst, Operand(rsp, 0));
    3006             :         __ por(dst, kScratchDoubleReg);
    3007             :       }
    3008             :       __ movq(rsp, tmp);
    3009             :       break;
    3010             :     }
    3011             :     case kX64S32x4Swizzle: {
    3012             :       DCHECK_EQ(2, instr->InputCount());
    3013        3860 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
    3014             :                               i.InputInt8(1));
    3015             :       break;
    3016             :     }
    3017             :     case kX64S32x4Shuffle: {
    3018             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3019             :       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
    3020             :       int8_t shuffle = i.InputInt8(2);
    3021             :       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
    3022        5655 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
    3023        5685 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
    3024        3790 :       __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
    3025             :       break;
    3026             :     }
    3027             :     case kX64S16x8Blend: {
    3028         280 :       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
    3029         140 :       break;
    3030             :     }
    3031             :     case kX64S16x8HalfShuffle1: {
    3032         920 :       XMMRegister dst = i.OutputSimd128Register();
    3033        3680 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
    3034         920 :       __ pshufhw(dst, dst, i.InputInt8(2));
    3035             :       break;
    3036             :     }
    3037             :     case kX64S16x8HalfShuffle2: {
    3038             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3039         610 :       XMMRegister dst = i.OutputSimd128Register();
    3040        2440 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
    3041         610 :       __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
    3042        2440 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
    3043         610 :       __ pshufhw(dst, dst, i.InputInt8(3));
    3044         610 :       __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
    3045             :       break;
    3046             :     }
    3047             :     case kX64S8x16Alignr: {
    3048        1200 :       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
    3049         600 :       break;
    3050             :     }
    3051             :     case kX64S16x8Dup: {
    3052         475 :       XMMRegister dst = i.OutputSimd128Register();
    3053         475 :       int8_t lane = i.InputInt8(1) & 0x7;
    3054         475 :       int8_t lane4 = lane & 0x3;
    3055         475 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3056         475 :       if (lane < 4) {
    3057        1425 :         ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
    3058         475 :         __ pshufd(dst, dst, 0);
    3059             :       } else {
    3060           0 :         ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
    3061           0 :         __ pshufd(dst, dst, 0xaa);
    3062             :       }
    3063             :       break;
    3064             :     }
    3065             :     case kX64S8x16Dup: {
    3066             :       XMMRegister dst = i.OutputSimd128Register();
    3067         610 :       int8_t lane = i.InputInt8(1) & 0xf;
    3068             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3069         610 :       if (lane < 8) {
    3070         570 :         __ punpcklbw(dst, dst);
    3071             :       } else {
    3072          40 :         __ punpckhbw(dst, dst);
    3073             :       }
    3074         610 :       lane &= 0x7;
    3075         610 :       int8_t lane4 = lane & 0x3;
    3076         610 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3077         610 :       if (lane < 4) {
    3078         570 :         __ pshuflw(dst, dst, half_dup);
    3079         570 :         __ pshufd(dst, dst, 0);
    3080             :       } else {
    3081          40 :         __ pshufhw(dst, dst, half_dup);
    3082          40 :         __ pshufd(dst, dst, 0xaa);
    3083             :       }
    3084             :       break;
    3085             :     }
    3086             :     case kX64S64x2UnpackHigh:
    3087           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
    3088             :       break;
    3089             :     case kX64S32x4UnpackHigh:
    3090        2060 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
    3091             :       break;
    3092             :     case kX64S16x8UnpackHigh:
    3093        2420 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
    3094             :       break;
    3095             :     case kX64S8x16UnpackHigh:
    3096        1340 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
    3097             :       break;
    3098             :     case kX64S64x2UnpackLow:
    3099         160 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
    3100             :       break;
    3101             :     case kX64S32x4UnpackLow:
    3102        1180 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
    3103             :       break;
    3104             :     case kX64S16x8UnpackLow:
    3105        1180 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
    3106             :       break;
    3107             :     case kX64S8x16UnpackLow:
    3108        1660 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
    3109             :       break;
    3110             :     case kX64S16x8UnzipHigh: {
    3111             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3112             :       XMMRegister dst = i.OutputSimd128Register();
    3113             :       XMMRegister src2 = dst;
    3114             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3115         265 :       if (instr->InputCount() == 2) {
    3116         735 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3117         245 :         __ psrld(kScratchDoubleReg, 16);
    3118             :         src2 = kScratchDoubleReg;
    3119             :       }
    3120         265 :       __ psrld(dst, 16);
    3121             :       __ packusdw(dst, src2);
    3122             :       break;
    3123             :     }
    3124             :     case kX64S16x8UnzipLow: {
    3125             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3126             :       XMMRegister dst = i.OutputSimd128Register();
    3127             :       XMMRegister src2 = dst;
    3128             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3129         355 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    3130         355 :       if (instr->InputCount() == 2) {
    3131        1005 :         ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
    3132             :         src2 = kScratchDoubleReg;
    3133             :       }
    3134         355 :       __ pblendw(dst, kScratchDoubleReg, 0xaa);
    3135             :       __ packusdw(dst, src2);
    3136             :       break;
    3137             :     }
    3138             :     case kX64S8x16UnzipHigh: {
    3139             :       XMMRegister dst = i.OutputSimd128Register();
    3140             :       XMMRegister src2 = dst;
    3141             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3142         370 :       if (instr->InputCount() == 2) {
    3143        1035 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3144         350 :         __ psrlw(kScratchDoubleReg, 8);
    3145             :         src2 = kScratchDoubleReg;
    3146             :       }
    3147         370 :       __ psrlw(dst, 8);
    3148             :       __ packuswb(dst, src2);
    3149             :       break;
    3150             :     }
    3151             :     case kX64S8x16UnzipLow: {
    3152             :       XMMRegister dst = i.OutputSimd128Register();
    3153             :       XMMRegister src2 = dst;
    3154             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3155         395 :       if (instr->InputCount() == 2) {
    3156        1065 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3157         355 :         __ psllw(kScratchDoubleReg, 8);
    3158         355 :         __ psrlw(kScratchDoubleReg, 8);
    3159             :         src2 = kScratchDoubleReg;
    3160             :       }
    3161         395 :       __ psllw(dst, 8);
    3162         395 :       __ psrlw(dst, 8);
    3163             :       __ packuswb(dst, src2);
    3164             :       break;
    3165             :     }
    3166             :     case kX64S8x16TransposeLow: {
    3167             :       XMMRegister dst = i.OutputSimd128Register();
    3168             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3169         220 :       __ psllw(dst, 8);
    3170         220 :       if (instr->InputCount() == 1) {
    3171          20 :         __ movups(kScratchDoubleReg, dst);
    3172             :       } else {
    3173             :         DCHECK_EQ(2, instr->InputCount());
    3174         600 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3175         200 :         __ psllw(kScratchDoubleReg, 8);
    3176             :       }
    3177         220 :       __ psrlw(dst, 8);
    3178             :       __ por(dst, kScratchDoubleReg);
    3179             :       break;
    3180             :     }
    3181             :     case kX64S8x16TransposeHigh: {
    3182             :       XMMRegister dst = i.OutputSimd128Register();
    3183             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3184         475 :       __ psrlw(dst, 8);
    3185         475 :       if (instr->InputCount() == 1) {
    3186          20 :         __ movups(kScratchDoubleReg, dst);
    3187             :       } else {
    3188             :         DCHECK_EQ(2, instr->InputCount());
    3189        1365 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3190         455 :         __ psrlw(kScratchDoubleReg, 8);
    3191             :       }
    3192         475 :       __ psllw(kScratchDoubleReg, 8);
    3193             :       __ por(dst, kScratchDoubleReg);
    3194             :       break;
    3195             :     }
    3196             :     case kX64S8x8Reverse:
    3197             :     case kX64S8x4Reverse:
    3198             :     case kX64S8x2Reverse: {
    3199             :       DCHECK_EQ(1, instr->InputCount());
    3200             :       XMMRegister dst = i.OutputSimd128Register();
    3201             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3202        1050 :       if (arch_opcode != kX64S8x2Reverse) {
    3203             :         // First shuffle words into position.
    3204         605 :         int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
    3205         605 :         __ pshuflw(dst, dst, shuffle_mask);
    3206         605 :         __ pshufhw(dst, dst, shuffle_mask);
    3207             :       }
    3208        1050 :       __ movaps(kScratchDoubleReg, dst);
    3209        1050 :       __ psrlw(kScratchDoubleReg, 8);
    3210        1050 :       __ psllw(dst, 8);
    3211             :       __ por(dst, kScratchDoubleReg);
    3212             :       break;
    3213             :     }
    3214             :     case kX64S1x4AnyTrue:
    3215             :     case kX64S1x8AnyTrue:
    3216             :     case kX64S1x16AnyTrue: {
    3217             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3218             :       Register dst = i.OutputRegister();
    3219             :       XMMRegister src = i.InputSimd128Register(0);
    3220             :       Register tmp = i.TempRegister(0);
    3221         135 :       __ xorq(tmp, tmp);
    3222             :       __ movq(dst, Immediate(1));
    3223             :       __ ptest(src, src);
    3224         135 :       __ cmovq(zero, dst, tmp);
    3225             :       break;
    3226             :     }
    3227             :     case kX64S1x4AllTrue:
    3228             :     case kX64S1x8AllTrue:
    3229             :     case kX64S1x16AllTrue: {
    3230             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3231             :       Register dst = i.OutputRegister();
    3232             :       XMMRegister src = i.InputSimd128Register(0);
    3233             :       Register tmp = i.TempRegister(0);
    3234         135 :       __ movq(tmp, Immediate(1));
    3235             :       __ xorq(dst, dst);
    3236             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    3237             :       __ pxor(kScratchDoubleReg, src);
    3238             :       __ ptest(kScratchDoubleReg, kScratchDoubleReg);
    3239         135 :       __ cmovq(zero, dst, tmp);
    3240             :       break;
    3241             :     }
    3242             :     case kX64StackCheck:
    3243      554385 :       __ CompareRoot(rsp, RootIndex::kStackLimit);
    3244      554404 :       break;
    3245             :     case kWord32AtomicExchangeInt8: {
    3246        1872 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3247        1872 :       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
    3248         936 :       break;
    3249             :     }
    3250             :     case kWord32AtomicExchangeUint8: {
    3251        1980 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3252         990 :       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
    3253             :       break;
    3254             :     }
    3255             :     case kWord32AtomicExchangeInt16: {
    3256        1540 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3257        1540 :       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
    3258         770 :       break;
    3259             :     }
    3260             :     case kWord32AtomicExchangeUint16: {
    3261        1316 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3262         658 :       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
    3263             :       break;
    3264             :     }
    3265             :     case kWord32AtomicExchangeWord32: {
    3266        2376 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3267             :       break;
    3268             :     }
    3269             :     case kWord32AtomicCompareExchangeInt8: {
    3270         112 :       __ lock();
    3271         224 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3272         112 :       __ movsxbl(rax, rax);
    3273         112 :       break;
    3274             :     }
    3275             :     case kWord32AtomicCompareExchangeUint8: {
    3276         137 :       __ lock();
    3277         274 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3278             :       __ movzxbl(rax, rax);
    3279             :       break;
    3280             :     }
    3281             :     case kWord32AtomicCompareExchangeInt16: {
    3282         112 :       __ lock();
    3283         224 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3284         112 :       __ movsxwl(rax, rax);
    3285         112 :       break;
    3286             :     }
    3287             :     case kWord32AtomicCompareExchangeUint16: {
    3288         137 :       __ lock();
    3289         274 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3290             :       __ movzxwl(rax, rax);
    3291             :       break;
    3292             :     }
    3293             :     case kWord32AtomicCompareExchangeWord32: {
    3294         258 :       __ lock();
    3295         258 :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3296             :       break;
    3297             :     }
    3298             : #define ATOMIC_BINOP_CASE(op, inst)              \
    3299             :   case kWord32Atomic##op##Int8:                  \
    3300             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3301             :     __ movsxbl(rax, rax);                        \
    3302             :     break;                                       \
    3303             :   case kWord32Atomic##op##Uint8:                 \
    3304             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3305             :     __ movzxbl(rax, rax);                        \
    3306             :     break;                                       \
    3307             :   case kWord32Atomic##op##Int16:                 \
    3308             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3309             :     __ movsxwl(rax, rax);                        \
    3310             :     break;                                       \
    3311             :   case kWord32Atomic##op##Uint16:                \
    3312             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3313             :     __ movzxwl(rax, rax);                        \
    3314             :     break;                                       \
    3315             :   case kWord32Atomic##op##Word32:                \
    3316             :     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
    3317             :     break;
    3318       11536 :       ATOMIC_BINOP_CASE(Add, addl)
    3319       11776 :       ATOMIC_BINOP_CASE(Sub, subl)
    3320       10542 :       ATOMIC_BINOP_CASE(And, andl)
    3321       10610 :       ATOMIC_BINOP_CASE(Or, orl)
    3322       12464 :       ATOMIC_BINOP_CASE(Xor, xorl)
    3323             : #undef ATOMIC_BINOP_CASE
    3324             :     case kX64Word64AtomicExchangeUint8: {
    3325        4124 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3326        2062 :       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
    3327             :       break;
    3328             :     }
    3329             :     case kX64Word64AtomicExchangeUint16: {
    3330        1956 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3331         978 :       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
    3332             :       break;
    3333             :     }
    3334             :     case kX64Word64AtomicExchangeUint32: {
    3335        1700 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3336             :       break;
    3337             :     }
    3338             :     case kX64Word64AtomicExchangeUint64: {
    3339        1948 :       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
    3340             :       break;
    3341             :     }
    3342             :     case kX64Word64AtomicCompareExchangeUint8: {
    3343          25 :       __ lock();
    3344          50 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3345             :       __ movzxbq(rax, rax);
    3346             :       break;
    3347             :     }
    3348             :     case kX64Word64AtomicCompareExchangeUint16: {
    3349          25 :       __ lock();
    3350          50 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3351             :       __ movzxwq(rax, rax);
    3352             :       break;
    3353             :     }
    3354             :     case kX64Word64AtomicCompareExchangeUint32: {
    3355          25 :       __ lock();
    3356          25 :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3357             :       break;
    3358             :     }
    3359             :     case kX64Word64AtomicCompareExchangeUint64: {
    3360         279 :       __ lock();
    3361         279 :       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
    3362             :       break;
    3363             :     }
    3364             : #define ATOMIC64_BINOP_CASE(op, inst)              \
    3365             :   case kX64Word64Atomic##op##Uint8:                \
    3366             :     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
    3367             :     __ movzxbq(rax, rax);                          \
    3368             :     break;                                         \
    3369             :   case kX64Word64Atomic##op##Uint16:               \
    3370             :     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
    3371             :     __ movzxwq(rax, rax);                          \
    3372             :     break;                                         \
    3373             :   case kX64Word64Atomic##op##Uint32:               \
    3374             :     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
    3375             :     break;                                         \
    3376             :   case kX64Word64Atomic##op##Uint64:               \
    3377             :     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
    3378             :     break;
    3379       10237 :       ATOMIC64_BINOP_CASE(Add, addq)
    3380       10952 :       ATOMIC64_BINOP_CASE(Sub, subq)
    3381       10947 :       ATOMIC64_BINOP_CASE(And, andq)
    3382       11217 :       ATOMIC64_BINOP_CASE(Or, orq)
    3383       11460 :       ATOMIC64_BINOP_CASE(Xor, xorq)
    3384             : #undef ATOMIC64_BINOP_CASE
    3385             :     case kWord32AtomicLoadInt8:
    3386             :     case kWord32AtomicLoadUint8:
    3387             :     case kWord32AtomicLoadInt16:
    3388             :     case kWord32AtomicLoadUint16:
    3389             :     case kWord32AtomicLoadWord32:
    3390             :     case kWord32AtomicStoreWord8:
    3391             :     case kWord32AtomicStoreWord16:
    3392             :     case kWord32AtomicStoreWord32:
    3393             :     case kX64Word64AtomicLoadUint8:
    3394             :     case kX64Word64AtomicLoadUint16:
    3395             :     case kX64Word64AtomicLoadUint32:
    3396             :     case kX64Word64AtomicLoadUint64:
    3397             :     case kX64Word64AtomicStoreWord8:
    3398             :     case kX64Word64AtomicStoreWord16:
    3399             :     case kX64Word64AtomicStoreWord32:
    3400             :     case kX64Word64AtomicStoreWord64:
    3401           0 :       UNREACHABLE();  // Won't be generated by instruction selector.
    3402             :       break;
    3403             :   }
    3404             :   return kSuccess;
    3405             : }  // NOLadability/fn_size)
    3406             : 
    3407             : #undef ASSEMBLE_UNOP
    3408             : #undef ASSEMBLE_BINOP
    3409             : #undef ASSEMBLE_COMPARE
    3410             : #undef ASSEMBLE_MULT
    3411             : #undef ASSEMBLE_SHIFT
    3412             : #undef ASSEMBLE_MOVX
    3413             : #undef ASSEMBLE_SSE_BINOP
    3414             : #undef ASSEMBLE_SSE_UNOP
    3415             : #undef ASSEMBLE_AVX_BINOP
    3416             : #undef ASSEMBLE_IEEE754_BINOP
    3417             : #undef ASSEMBLE_IEEE754_UNOP
    3418             : #undef ASSEMBLE_ATOMIC_BINOP
    3419             : #undef ASSEMBLE_ATOMIC64_BINOP
    3420             : #undef ASSEMBLE_SIMD_INSTR
    3421             : #undef ASSEMBLE_SIMD_IMM_INSTR
    3422             : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
    3423             : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
    3424             : 
    3425             : namespace {
    3426             : 
    3427     6185284 : Condition FlagsConditionToCondition(FlagsCondition condition) {
    3428     6185284 :   switch (condition) {
    3429             :     case kUnorderedEqual:
    3430             :     case kEqual:
    3431             :       return equal;
    3432             :     case kUnorderedNotEqual:
    3433             :     case kNotEqual:
    3434     1431792 :       return not_equal;
    3435             :     case kSignedLessThan:
    3436      224293 :       return less;
    3437             :     case kSignedGreaterThanOrEqual:
    3438       56359 :       return greater_equal;
    3439             :     case kSignedLessThanOrEqual:
    3440      114447 :       return less_equal;
    3441             :     case kSignedGreaterThan:
    3442       76921 :       return greater;
    3443             :     case kUnsignedLessThan:
    3444      187857 :       return below;
    3445             :     case kUnsignedGreaterThanOrEqual:
    3446      231751 :       return above_equal;
    3447             :     case kUnsignedLessThanOrEqual:
    3448      943317 :       return below_equal;
    3449             :     case kUnsignedGreaterThan:
    3450      116487 :       return above;
    3451             :     case kOverflow:
    3452      203153 :       return overflow;
    3453             :     case kNotOverflow:
    3454        2128 :       return no_overflow;
    3455             :     default:
    3456             :       break;
    3457             :   }
    3458           0 :   UNREACHABLE();
    3459             : }
    3460             : 
    3461             : }  // namespace
    3462             : 
    3463             : // Assembles branches after this instruction.
    3464     5189870 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
    3465             :   Label::Distance flabel_distance =
    3466     5189870 :       branch->fallthru ? Label::kNear : Label::kFar;
    3467     5189870 :   Label* tlabel = branch->true_label;
    3468     5189870 :   Label* flabel = branch->false_label;
    3469     5189870 :   if (branch->condition == kUnorderedEqual) {
    3470       49660 :     __ j(parity_even, flabel, flabel_distance);
    3471     5140210 :   } else if (branch->condition == kUnorderedNotEqual) {
    3472      116253 :     __ j(parity_even, tlabel);
    3473             :   }
    3474     5189869 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3475             : 
    3476     5189893 :   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
    3477     5189893 : }
    3478             : 
    3479           0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
    3480             :                                             Instruction* instr) {
    3481             :   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
    3482           0 :   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
    3483           0 :     return;
    3484             :   }
    3485             : 
    3486             :   condition = NegateFlagsCondition(condition);
    3487           0 :   __ movl(kScratchRegister, Immediate(0));
    3488             :   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
    3489           0 :            kScratchRegister);
    3490             : }
    3491             : 
    3492      340295 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
    3493         299 :                                             BranchInfo* branch) {
    3494             :   Label::Distance flabel_distance =
    3495      340295 :       branch->fallthru ? Label::kNear : Label::kFar;
    3496      340295 :   Label* tlabel = branch->true_label;
    3497      340295 :   Label* flabel = branch->false_label;
    3498      340295 :   Label nodeopt;
    3499      340295 :   if (branch->condition == kUnorderedEqual) {
    3500           0 :     __ j(parity_even, flabel, flabel_distance);
    3501      340295 :   } else if (branch->condition == kUnorderedNotEqual) {
    3502        4186 :     __ j(parity_even, tlabel);
    3503             :   }
    3504      340295 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3505             : 
    3506      340294 :   if (FLAG_deopt_every_n_times > 0) {
    3507             :     ExternalReference counter =
    3508         299 :         ExternalReference::stress_deopt_count(isolate());
    3509             : 
    3510         299 :     __ pushfq();
    3511         299 :     __ pushq(rax);
    3512         299 :     __ load_rax(counter);
    3513             :     __ decl(rax);
    3514         299 :     __ j(not_zero, &nodeopt);
    3515             : 
    3516         299 :     __ Set(rax, FLAG_deopt_every_n_times);
    3517         299 :     __ store_rax(counter);
    3518         299 :     __ popq(rax);
    3519         299 :     __ popfq();
    3520         299 :     __ jmp(tlabel);
    3521             : 
    3522         299 :     __ bind(&nodeopt);
    3523         299 :     __ store_rax(counter);
    3524         299 :     __ popq(rax);
    3525         299 :     __ popfq();
    3526             :   }
    3527             : 
    3528      340294 :   if (!branch->fallthru) {
    3529           0 :     __ jmp(flabel, flabel_distance);
    3530             :   }
    3531      340294 : }
    3532             : 
    3533     8034053 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
    3534     8034053 :   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
    3535     4948683 : }
    3536             : 
    3537       47267 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
    3538       47267 :                                      FlagsCondition condition) {
    3539             :   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
    3540       47242 :   Label* tlabel = ool->entry();
    3541       47242 :   Label end;
    3542       47242 :   if (condition == kUnorderedEqual) {
    3543           0 :     __ j(parity_even, &end);
    3544       47242 :   } else if (condition == kUnorderedNotEqual) {
    3545         550 :     __ j(parity_even, tlabel);
    3546             :   }
    3547       47243 :   __ j(FlagsConditionToCondition(condition), tlabel);
    3548       47269 :   __ bind(&end);
    3549       47292 : }
    3550             : 
    3551             : // Assembles boolean materializations after this instruction.
    3552     1215762 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
    3553             :                                         FlagsCondition condition) {
    3554             :   X64OperandConverter i(this, instr);
    3555      607881 :   Label done;
    3556             : 
    3557             :   // Materialize a full 64-bit 1 or 0 value. The result register is always the
    3558             :   // last output of the instruction.
    3559      607881 :   Label check;
    3560             :   DCHECK_NE(0u, instr->OutputCount());
    3561      607881 :   Register reg = i.OutputRegister(instr->OutputCount() - 1);
    3562      607881 :   if (condition == kUnorderedEqual) {
    3563        2875 :     __ j(parity_odd, &check, Label::kNear);
    3564             :     __ movl(reg, Immediate(0));
    3565        2875 :     __ jmp(&done, Label::kNear);
    3566      605006 :   } else if (condition == kUnorderedNotEqual) {
    3567        3305 :     __ j(parity_odd, &check, Label::kNear);
    3568             :     __ movl(reg, Immediate(1));
    3569        3305 :     __ jmp(&done, Label::kNear);
    3570             :   }
    3571      607881 :   __ bind(&check);
    3572      607880 :   __ setcc(FlagsConditionToCondition(condition), reg);
    3573             :   __ movzxbl(reg, reg);
    3574      607881 :   __ bind(&done);
    3575      607881 : }
    3576             : 
    3577      514816 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
    3578             :   X64OperandConverter i(this, instr);
    3579       39506 :   Register input = i.InputRegister(0);
    3580             :   std::vector<std::pair<int32_t, Label*>> cases;
    3581      514816 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3582      653705 :     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
    3583             :   }
    3584             :   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
    3585       79014 :                                       cases.data() + cases.size());
    3586       39506 : }
    3587             : 
    3588           0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
    3589             :   X64OperandConverter i(this, instr);
    3590           0 :   Register input = i.InputRegister(0);
    3591           0 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3592           0 :     __ cmpl(input, Immediate(i.InputInt32(index + 0)));
    3593           0 :     __ j(equal, GetLabel(i.InputRpo(index + 1)));
    3594             :   }
    3595           0 :   AssembleArchJump(i.InputRpo(1));
    3596           0 : }
    3597             : 
    3598      225754 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
    3599             :   X64OperandConverter i(this, instr);
    3600         304 :   Register input = i.InputRegister(0);
    3601         304 :   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
    3602         304 :   Label** cases = zone()->NewArray<Label*>(case_count);
    3603      225146 :   for (int32_t index = 0; index < case_count; ++index) {
    3604      449684 :     cases[index] = GetLabel(i.InputRpo(index + 2));
    3605             :   }
    3606         304 :   Label* const table = AddJumpTable(cases, case_count);
    3607         304 :   __ cmpl(input, Immediate(case_count));
    3608         608 :   __ j(above_equal, GetLabel(i.InputRpo(1)));
    3609         608 :   __ leaq(kScratchRegister, Operand(table));
    3610         304 :   __ jmp(Operand(kScratchRegister, input, times_8, 0));
    3611         304 : }
    3612             : 
    3613             : namespace {
    3614             : 
    3615             : static const int kQuadWordSize = 16;
    3616             : 
    3617             : }  // namespace
    3618             : 
    3619     2949471 : void CodeGenerator::FinishFrame(Frame* frame) {
    3620     5898942 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3621             : 
    3622             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3623     2949471 :   if (saves_fp != 0) {
    3624             :     frame->AlignSavedCalleeRegisterSlots();
    3625           0 :     if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3626             :       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3627             :       frame->AllocateSavedCalleeRegisterSlots(
    3628           0 :           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
    3629             :     }
    3630             :   }
    3631             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3632     2949471 :   if (saves != 0) {  // Save callee-saved registers.
    3633             :     int count = 0;
    3634    18082416 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3635    18082416 :       if (((1 << i) & saves)) {
    3636     5650755 :         ++count;
    3637             :       }
    3638             :     }
    3639             :     frame->AllocateSavedCalleeRegisterSlots(count);
    3640             :   }
    3641     2949471 : }
    3642             : 
    3643    19797356 : void CodeGenerator::AssembleConstructFrame() {
    3644     7100095 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3645     2961038 :   if (frame_access_state()->has_frame()) {
    3646     2961110 :     int pc_base = __ pc_offset();
    3647             : 
    3648     2961110 :     if (call_descriptor->IsCFunctionCall()) {
    3649     1130151 :       __ pushq(rbp);
    3650             :       __ movq(rbp, rsp);
    3651     1830959 :     } else if (call_descriptor->IsJSFunctionCall()) {
    3652      652839 :       __ Prologue();
    3653      652845 :       if (call_descriptor->PushArgumentCount()) {
    3654       38861 :         __ pushq(kJavaScriptCallArgCountRegister);
    3655             :       }
    3656             :     } else {
    3657     1178120 :       __ StubPrologue(info()->GetOutputStackFrameType());
    3658     1178044 :       if (call_descriptor->IsWasmFunctionCall()) {
    3659     1062358 :         __ pushq(kWasmInstanceRegister);
    3660      115686 :       } else if (call_descriptor->IsWasmImportWrapper()) {
    3661             :         // WASM import wrappers are passed a tuple in the place of the instance.
    3662             :         // Unpack the tuple into the instance and the target callable.
    3663             :         // This must be done here in the codegen because it cannot be expressed
    3664             :         // properly in the graph.
    3665             :         __ LoadTaggedPointerField(
    3666             :             kJSFunctionRegister,
    3667        7087 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
    3668             :         __ LoadTaggedPointerField(
    3669             :             kWasmInstanceRegister,
    3670        7087 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
    3671        7087 :         __ pushq(kWasmInstanceRegister);
    3672             :       }
    3673             :     }
    3674             : 
    3675     2961180 :     unwinding_info_writer_.MarkFrameConstructed(pc_base);
    3676             :   }
    3677     2960938 :   int shrink_slots = frame()->GetTotalFrameSlotCount() -
    3678     2960938 :                      call_descriptor->CalculateFixedFrameSize();
    3679             : 
    3680     2961105 :   if (info()->is_osr()) {
    3681             :     // TurboFan OSR-compiled functions cannot be entered directly.
    3682        4917 :     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
    3683             : 
    3684             :     // Unoptimized code jumps directly to this entrypoint while the unoptimized
    3685             :     // frame is still on the stack. Optimized code uses OSR values directly from
    3686             :     // the unoptimized frame. Thus, all that needs to be done is to allocate the
    3687             :     // remaining stack slots.
    3688        4917 :     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
    3689        9834 :     osr_pc_offset_ = __ pc_offset();
    3690        4917 :     shrink_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
    3691        4917 :     ResetSpeculationPoison();
    3692             :   }
    3693             : 
    3694             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3695             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3696             : 
    3697     2960941 :   if (shrink_slots > 0) {
    3698             :     DCHECK(frame_access_state()->has_frame());
    3699     1907061 :     if (info()->IsWasm() && shrink_slots > 128) {
    3700             :       // For WebAssembly functions with big frames we have to do the stack
    3701             :       // overflow check before we construct the frame. Otherwise we may not
    3702             :       // have enough space on the stack to call the runtime for the stack
    3703             :       // overflow.
    3704           9 :       Label done;
    3705             : 
    3706             :       // If the frame is bigger than the stack, we throw the stack overflow
    3707             :       // exception unconditionally. Thereby we can avoid the integer overflow
    3708             :       // check in the condition code.
    3709           9 :       if (shrink_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
    3710             :         __ movq(kScratchRegister,
    3711             :                 FieldOperand(kWasmInstanceRegister,
    3712           9 :                              WasmInstanceObject::kRealStackLimitAddressOffset));
    3713          18 :         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
    3714           9 :         __ addq(kScratchRegister, Immediate(shrink_slots * kSystemPointerSize));
    3715           9 :         __ cmpq(rsp, kScratchRegister);
    3716           9 :         __ j(above_equal, &done);
    3717             :       }
    3718             :       __ LoadTaggedPointerField(
    3719             :           rcx, FieldOperand(kWasmInstanceRegister,
    3720           9 :                             WasmInstanceObject::kCEntryStubOffset));
    3721           9 :       __ Move(rsi, Smi::zero());
    3722           9 :       __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
    3723             :       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
    3724             :       RecordSafepoint(reference_map, Safepoint::kSimple, 0,
    3725           9 :                       Safepoint::kNoLazyDeopt);
    3726           9 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
    3727           9 :       __ bind(&done);
    3728             :     }
    3729             : 
    3730             :     // Skip callee-saved and return slots, which are created below.
    3731     1907061 :     shrink_slots -= base::bits::CountPopulation(saves);
    3732             :     shrink_slots -= base::bits::CountPopulation(saves_fp) *
    3733     1907061 :                     (kQuadWordSize / kSystemPointerSize);
    3734     1907061 :     shrink_slots -= frame()->GetReturnSlotCount();
    3735     1907061 :     if (shrink_slots > 0) {
    3736     3372810 :       __ subq(rsp, Immediate(shrink_slots * kSystemPointerSize));
    3737             :     }
    3738             :   }
    3739             : 
    3740     2960986 :   if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3741             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3742           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3743             :     // Adjust the stack pointer.
    3744           0 :     __ subp(rsp, Immediate(stack_size));
    3745             :     // Store the registers on the stack.
    3746             :     int slot_idx = 0;
    3747           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3748           0 :       if (!((1 << i) & saves_fp)) continue;
    3749             :       __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
    3750           0 :                 XMMRegister::from_code(i));
    3751           0 :       slot_idx++;
    3752             :     }
    3753             :   }
    3754             : 
    3755     2960986 :   if (saves != 0) {  // Save callee-saved registers.
    3756    18082416 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3757    18082416 :       if (!((1 << i) & saves)) continue;
    3758     5650755 :       __ pushq(Register::from_code(i));
    3759             :     }
    3760             :   }
    3761             : 
    3762             :   // Allocate return slots (located after callee-saved).
    3763     2960986 :   if (frame()->GetReturnSlotCount() > 0) {
    3764        1676 :     __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
    3765             :   }
    3766     2960986 : }
    3767             : 
    3768     6528350 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
    3769    13056700 :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3770             : 
    3771             :   // Restore registers.
    3772             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3773     3264175 :   if (saves != 0) {
    3774     1141861 :     const int returns = frame()->GetReturnSlotCount();
    3775     1141861 :     if (returns != 0) {
    3776        1640 :       __ addq(rsp, Immediate(returns * kSystemPointerSize));
    3777             :     }
    3778    18269776 :     for (int i = 0; i < Register::kNumRegisters; i++) {
    3779    18269776 :       if (!((1 << i) & saves)) continue;
    3780     5709305 :       __ popq(Register::from_code(i));
    3781             :     }
    3782             :   }
    3783             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3784     3264175 :   if (saves_fp != 0) {
    3785             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3786           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3787             :     // Load the registers from the stack.
    3788             :     int slot_idx = 0;
    3789           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3790           0 :       if (!((1 << i) & saves_fp)) continue;
    3791             :       __ movdqu(XMMRegister::from_code(i),
    3792           0 :                 Operand(rsp, kQuadWordSize * slot_idx));
    3793           0 :       slot_idx++;
    3794             :     }
    3795             :     // Adjust the stack pointer.
    3796           0 :     __ addp(rsp, Immediate(stack_size));
    3797             :   }
    3798             : 
    3799             :   unwinding_info_writer_.MarkBlockWillExit();
    3800             : 
    3801             :   // Might need rcx for scratch if pop_size is too big or if there is a variable
    3802             :   // pop count.
    3803             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
    3804             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
    3805     3264175 :   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
    3806             :   X64OperandConverter g(this, nullptr);
    3807     3264175 :   if (call_descriptor->IsCFunctionCall()) {
    3808     1141861 :     AssembleDeconstructFrame();
    3809     2122314 :   } else if (frame_access_state()->has_frame()) {
    3810     4101687 :     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
    3811             :       // Canonicalize JSFunction return sites for now.
    3812     2025354 :       if (return_label_.is_bound()) {
    3813      309139 :         __ jmp(&return_label_);
    3814     3264292 :         return;
    3815             :       } else {
    3816     1716215 :         __ bind(&return_label_);
    3817     1716240 :         AssembleDeconstructFrame();
    3818             :       }
    3819             :     } else {
    3820       50995 :       AssembleDeconstructFrame();
    3821             :     }
    3822             :   }
    3823             : 
    3824     2955205 :   if (pop->IsImmediate()) {
    3825     5808510 :     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
    3826     2904270 :     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
    3827     2904270 :     __ Ret(static_cast<int>(pop_size), rcx);
    3828             :   } else {
    3829             :     Register pop_reg = g.ToRegister(pop);
    3830       50965 :     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
    3831       50965 :     __ popq(scratch_reg);
    3832      101930 :     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
    3833       50965 :     __ jmp(scratch_reg);
    3834             :   }
    3835             : }
    3836             : 
    3837     2949296 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
    3838             : 
    3839    34754549 : void CodeGenerator::AssembleMove(InstructionOperand* source,
    3840             :                                  InstructionOperand* destination) {
    3841             :   X64OperandConverter g(this, nullptr);
    3842             :   // Helper function to write the given constant to the dst register.
    3843    18245952 :   auto MoveConstantToRegister = [&](Register dst, Constant src) {
    3844    18245952 :     switch (src.type()) {
    3845             :       case Constant::kInt32: {
    3846     4430992 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3847           0 :           __ movq(dst, src.ToInt64(), src.rmode());
    3848             :         } else {
    3849     4430992 :           int32_t value = src.ToInt32();
    3850     4430992 :           if (value == 0) {
    3851     1056102 :             __ xorl(dst, dst);
    3852             :           } else {
    3853     3374890 :             __ movl(dst, Immediate(value));
    3854             :           }
    3855             :         }
    3856             :         break;
    3857             :       }
    3858             :       case Constant::kInt64:
    3859     2165785 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3860           0 :           __ movq(dst, src.ToInt64(), src.rmode());
    3861             :         } else {
    3862     2165785 :           __ Set(dst, src.ToInt64());
    3863             :         }
    3864             :         break;
    3865             :       case Constant::kFloat32:
    3866         920 :         __ MoveNumber(dst, src.ToFloat32());
    3867         460 :         break;
    3868             :       case Constant::kFloat64:
    3869     1288823 :         __ MoveNumber(dst, src.ToFloat64().value());
    3870     1288827 :         break;
    3871             :       case Constant::kExternalReference:
    3872     5076912 :         __ Move(dst, src.ToExternalReference());
    3873     2538490 :         break;
    3874             :       case Constant::kHeapObject: {
    3875     7819105 :         Handle<HeapObject> src_object = src.ToHeapObject();
    3876             :         RootIndex index;
    3877     7819110 :         if (IsMaterializableFromRoot(src_object, &index)) {
    3878     1623856 :           __ LoadRoot(dst, index);
    3879             :         } else {
    3880     6195252 :           __ Move(dst, src_object);
    3881             :         }
    3882             :         break;
    3883             :       }
    3884             :       case Constant::kDelayedStringConstant: {
    3885        2227 :         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
    3886        2227 :         __ MoveStringConstant(dst, src_constant);
    3887        2227 :         break;
    3888             :       }
    3889             :       case Constant::kRpoNumber:
    3890           0 :         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
    3891             :         break;
    3892             :     }
    3893    53000607 :   };
    3894             :   // Helper function to write the given constant to the stack.
    3895       38717 :   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
    3896       38717 :     if (!RelocInfo::IsWasmReference(src.rmode())) {
    3897       38717 :       switch (src.type()) {
    3898             :         case Constant::kInt32:
    3899       19544 :           __ movq(dst, Immediate(src.ToInt32()));
    3900       19544 :           return;
    3901             :         case Constant::kInt64:
    3902       13355 :           __ Set(dst, src.ToInt64());
    3903       13355 :           return;
    3904             :         default:
    3905             :           break;
    3906             :       }
    3907             :     }
    3908        5818 :     MoveConstantToRegister(kScratchRegister, src);
    3909        5818 :     __ movq(dst, kScratchRegister);
    3910    34754549 :   };
    3911             :   // Dispatch on the source and destination operand kinds.
    3912    34754549 :   switch (MoveType::InferMove(source, destination)) {
    3913             :     case MoveType::kRegisterToRegister:
    3914     4138179 :       if (source->IsRegister()) {
    3915     4017087 :         __ movq(g.ToRegister(destination), g.ToRegister(source));
    3916             :       } else {
    3917             :         DCHECK(source->IsFPRegister());
    3918             :         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
    3919             :       }
    3920             :       return;
    3921             :     case MoveType::kRegisterToStack: {
    3922     4349863 :       Operand dst = g.ToOperand(destination);
    3923     4349863 :       if (source->IsRegister()) {
    3924     4041135 :         __ movq(dst, g.ToRegister(source));
    3925             :       } else {
    3926             :         DCHECK(source->IsFPRegister());
    3927             :         XMMRegister src = g.ToDoubleRegister(source);
    3928             :         MachineRepresentation rep =
    3929             :             LocationOperand::cast(source)->representation();
    3930      308728 :         if (rep != MachineRepresentation::kSimd128) {
    3931             :           __ Movsd(dst, src);
    3932             :         } else {
    3933             :           __ Movups(dst, src);
    3934             :         }
    3935             :       }
    3936             :       return;
    3937             :     }
    3938             :     case MoveType::kStackToRegister: {
    3939     7601047 :       Operand src = g.ToOperand(source);
    3940     7601047 :       if (source->IsStackSlot()) {
    3941     7173432 :         __ movq(g.ToRegister(destination), src);
    3942             :       } else {
    3943             :         DCHECK(source->IsFPStackSlot());
    3944             :         XMMRegister dst = g.ToDoubleRegister(destination);
    3945             :         MachineRepresentation rep =
    3946             :             LocationOperand::cast(source)->representation();
    3947      427615 :         if (rep != MachineRepresentation::kSimd128) {
    3948             :           __ Movsd(dst, src);
    3949             :         } else {
    3950             :           __ Movups(dst, src);
    3951             :         }
    3952             :       }
    3953             :       return;
    3954             :     }
    3955             :     case MoveType::kStackToStack: {
    3956       47847 :       Operand src = g.ToOperand(source);
    3957       47847 :       Operand dst = g.ToOperand(destination);
    3958       47847 :       if (source->IsStackSlot()) {
    3959             :         // Spill on demand to use a temporary register for memory-to-memory
    3960             :         // moves.
    3961       26072 :         __ movq(kScratchRegister, src);
    3962             :         __ movq(dst, kScratchRegister);
    3963             :       } else {
    3964             :         MachineRepresentation rep =
    3965             :             LocationOperand::cast(source)->representation();
    3966       21775 :         if (rep != MachineRepresentation::kSimd128) {
    3967             :           __ Movsd(kScratchDoubleReg, src);
    3968             :           __ Movsd(dst, kScratchDoubleReg);
    3969             :         } else {
    3970             :           DCHECK(source->IsSimd128StackSlot());
    3971             :           __ Movups(kScratchDoubleReg, src);
    3972             :           __ Movups(dst, kScratchDoubleReg);
    3973             :         }
    3974             :       }
    3975             :       return;
    3976             :     }
    3977             :     case MoveType::kConstantToRegister: {
    3978    18570867 :       Constant src = g.ToConstant(source);
    3979    18570910 :       if (destination->IsRegister()) {
    3980    18240154 :         MoveConstantToRegister(g.ToRegister(destination), src);
    3981             :       } else {
    3982             :         DCHECK(destination->IsFPRegister());
    3983      330756 :         XMMRegister dst = g.ToDoubleRegister(destination);
    3984      330756 :         if (src.type() == Constant::kFloat32) {
    3985             :           // TODO(turbofan): Can we do better here?
    3986       15408 :           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
    3987             :         } else {
    3988             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    3989      315348 :           __ Move(dst, src.ToFloat64().AsUint64());
    3990             :         }
    3991             :       }
    3992             :       return;
    3993             :     }
    3994             :     case MoveType::kConstantToStack: {
    3995       46842 :       Constant src = g.ToConstant(source);
    3996       46842 :       Operand dst = g.ToOperand(destination);
    3997       46842 :       if (destination->IsStackSlot()) {
    3998       38717 :         MoveConstantToSlot(dst, src);
    3999             :       } else {
    4000             :         DCHECK(destination->IsFPStackSlot());
    4001        8125 :         if (src.type() == Constant::kFloat32) {
    4002        3745 :           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
    4003             :         } else {
    4004             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4005        4380 :           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
    4006             :           __ movq(dst, kScratchRegister);
    4007             :         }
    4008             :       }
    4009             :       return;
    4010             :     }
    4011             :   }
    4012           0 :   UNREACHABLE();
    4013             : }
    4014             : 
    4015       88887 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
    4016        4132 :                                  InstructionOperand* destination) {
    4017             :   X64OperandConverter g(this, nullptr);
    4018             :   // Dispatch on the source and destination operand kinds.  Not all
    4019             :   // combinations are possible.
    4020       88887 :   switch (MoveType::InferSwap(source, destination)) {
    4021             :     case MoveType::kRegisterToRegister: {
    4022       75804 :       if (source->IsRegister()) {
    4023             :         Register src = g.ToRegister(source);
    4024             :         Register dst = g.ToRegister(destination);
    4025       73056 :         __ movq(kScratchRegister, src);
    4026             :         __ movq(src, dst);
    4027             :         __ movq(dst, kScratchRegister);
    4028             :       } else {
    4029             :         DCHECK(source->IsFPRegister());
    4030             :         XMMRegister src = g.ToDoubleRegister(source);
    4031             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4032             :         __ Movapd(kScratchDoubleReg, src);
    4033             :         __ Movapd(src, dst);
    4034             :         __ Movapd(dst, kScratchDoubleReg);
    4035             :       }
    4036             :       return;
    4037             :     }
    4038             :     case MoveType::kRegisterToStack: {
    4039        8361 :       if (source->IsRegister()) {
    4040             :         Register src = g.ToRegister(source);
    4041        2066 :         __ pushq(src);
    4042             :         frame_access_state()->IncreaseSPDelta(1);
    4043             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4044        4132 :                                                          kSystemPointerSize);
    4045             :         __ movq(src, g.ToOperand(destination));
    4046             :         frame_access_state()->IncreaseSPDelta(-1);
    4047        2066 :         __ popq(g.ToOperand(destination));
    4048             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4049        4132 :                                                          -kSystemPointerSize);
    4050             :       } else {
    4051             :         DCHECK(source->IsFPRegister());
    4052             :         XMMRegister src = g.ToDoubleRegister(source);
    4053        6295 :         Operand dst = g.ToOperand(destination);
    4054             :         MachineRepresentation rep =
    4055             :             LocationOperand::cast(source)->representation();
    4056        6295 :         if (rep != MachineRepresentation::kSimd128) {
    4057             :           __ Movsd(kScratchDoubleReg, src);
    4058             :           __ Movsd(src, dst);
    4059             :           __ Movsd(dst, kScratchDoubleReg);
    4060             :         } else {
    4061             :           __ Movups(kScratchDoubleReg, src);
    4062             :           __ Movups(src, dst);
    4063             :           __ Movups(dst, kScratchDoubleReg);
    4064             :         }
    4065             :       }
    4066             :       return;
    4067             :     }
    4068             :     case MoveType::kStackToStack: {
    4069        4722 :       Operand src = g.ToOperand(source);
    4070        4722 :       Operand dst = g.ToOperand(destination);
    4071             :       MachineRepresentation rep =
    4072             :           LocationOperand::cast(source)->representation();
    4073        4722 :       if (rep != MachineRepresentation::kSimd128) {
    4074             :         Register tmp = kScratchRegister;
    4075        3587 :         __ movq(tmp, dst);
    4076        3587 :         __ pushq(src);  // Then use stack to copy src to destination.
    4077             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4078        7174 :                                                          kSystemPointerSize);
    4079        3587 :         __ popq(dst);
    4080             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4081        7174 :                                                          -kSystemPointerSize);
    4082             :         __ movq(src, tmp);
    4083             :       } else {
    4084             :         // Without AVX, misaligned reads and writes will trap. Move using the
    4085             :         // stack, in two parts.
    4086        1135 :         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
    4087        1135 :         __ pushq(src);  // Then use stack to copy src to destination.
    4088             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4089        2270 :                                                          kSystemPointerSize);
    4090        1135 :         __ popq(dst);
    4091             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4092        2270 :                                                          -kSystemPointerSize);
    4093        1135 :         __ pushq(g.ToOperand(source, kSystemPointerSize));
    4094             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4095        2270 :                                                          kSystemPointerSize);
    4096        1135 :         __ popq(g.ToOperand(destination, kSystemPointerSize));
    4097             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4098        2270 :                                                          -kSystemPointerSize);
    4099        1135 :         __ movups(src, kScratchDoubleReg);
    4100             :       }
    4101             :       return;
    4102             :     }
    4103             :     default:
    4104           0 :       UNREACHABLE();
    4105             :       break;
    4106             :   }
    4107             : }
    4108             : 
    4109         304 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
    4110      225146 :   for (size_t index = 0; index < target_count; ++index) {
    4111      224842 :     __ dq(targets[index]);
    4112             :   }
    4113         304 : }
    4114             : 
    4115             : #undef __
    4116             : 
    4117             : }  // namespace compiler
    4118             : }  // namespace internal
    4119      183867 : }  // namespace v8

Generated by: LCOV version 1.10