LCOV - code coverage report
Current view: top level - src/compiler/backend/x64 - code-generator-x64.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1402 1585 88.5 %
Date: 2019-04-17 Functions: 41 58 70.7 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/compiler/backend/code-generator.h"
       6             : 
       7             : #include <limits>
       8             : 
       9             : #include "src/base/overflowing-math.h"
      10             : #include "src/compiler/backend/code-generator-impl.h"
      11             : #include "src/compiler/backend/gap-resolver.h"
      12             : #include "src/compiler/node-matchers.h"
      13             : #include "src/compiler/osr.h"
      14             : #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
      15             : #include "src/macro-assembler.h"
      16             : #include "src/objects/smi.h"
      17             : #include "src/optimized-compilation-info.h"
      18             : #include "src/wasm/wasm-code-manager.h"
      19             : #include "src/wasm/wasm-objects.h"
      20             : #include "src/x64/assembler-x64.h"
      21             : 
      22             : namespace v8 {
      23             : namespace internal {
      24             : namespace compiler {
      25             : 
      26             : #define __ tasm()->
      27             : 
      28             : // Adds X64 specific methods for decoding operands.
      29             : class X64OperandConverter : public InstructionOperandConverter {
      30             :  public:
      31             :   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
      32             :       : InstructionOperandConverter(gen, instr) {}
      33             : 
      34             :   Immediate InputImmediate(size_t index) {
      35     5360832 :     return ToImmediate(instr_->InputAt(index));
      36             :   }
      37             : 
      38     1056765 :   Operand InputOperand(size_t index, int extra = 0) {
      39     2113534 :     return ToOperand(instr_->InputAt(index), extra);
      40             :   }
      41             : 
      42           0 :   Operand OutputOperand() { return ToOperand(instr_->Output()); }
      43             : 
      44     4405068 :   Immediate ToImmediate(InstructionOperand* operand) {
      45     4405068 :     Constant constant = ToConstant(operand);
      46     4405151 :     if (constant.type() == Constant::kFloat64) {
      47             :       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
      48      365964 :       return Immediate(0);
      49             :     }
      50     4039187 :     if (RelocInfo::IsWasmReference(constant.rmode())) {
      51           0 :       return Immediate(constant.ToInt32(), constant.rmode());
      52             :     }
      53     4039187 :     return Immediate(constant.ToInt32());
      54             :   }
      55             : 
      56             :   Operand ToOperand(InstructionOperand* op, int extra = 0) {
      57             :     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
      58    15800601 :     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
      59             :   }
      60             : 
      61    15813533 :   Operand SlotToOperand(int slot_index, int extra = 0) {
      62    15813533 :     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
      63             :     return Operand(offset.from_stack_pointer() ? rsp : rbp,
      64    31626908 :                    offset.offset() + extra);
      65             :   }
      66             : 
      67             :   static size_t NextOffset(size_t* offset) {
      68    16421465 :     size_t i = *offset;
      69    29397426 :     (*offset)++;
      70             :     return i;
      71             :   }
      72             : 
      73             :   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
      74             :     STATIC_ASSERT(0 == static_cast<int>(times_1));
      75             :     STATIC_ASSERT(1 == static_cast<int>(times_2));
      76             :     STATIC_ASSERT(2 == static_cast<int>(times_4));
      77             :     STATIC_ASSERT(3 == static_cast<int>(times_8));
      78     1311255 :     int scale = static_cast<int>(mode - one);
      79             :     DCHECK(scale >= 0 && scale < 4);
      80     1311255 :     return static_cast<ScaleFactor>(scale);
      81             :   }
      82             : 
      83    16421465 :   Operand MemoryOperand(size_t* offset) {
      84    16421465 :     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
      85    16421465 :     switch (mode) {
      86             :       case kMode_MR: {
      87     2381163 :         Register base = InputRegister(NextOffset(offset));
      88             :         int32_t disp = 0;
      89     2381163 :         return Operand(base, disp);
      90             :       }
      91             :       case kMode_MRI: {
      92    11229271 :         Register base = InputRegister(NextOffset(offset));
      93             :         int32_t disp = InputInt32(NextOffset(offset));
      94    11228954 :         return Operand(base, disp);
      95             :       }
      96             :       case kMode_MR1:
      97             :       case kMode_MR2:
      98             :       case kMode_MR4:
      99             :       case kMode_MR8: {
     100      703859 :         Register base = InputRegister(NextOffset(offset));
     101      703859 :         Register index = InputRegister(NextOffset(offset));
     102             :         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
     103             :         int32_t disp = 0;
     104      703859 :         return Operand(base, index, scale, disp);
     105             :       }
     106             :       case kMode_MR1I:
     107             :       case kMode_MR2I:
     108             :       case kMode_MR4I:
     109             :       case kMode_MR8I: {
     110      455182 :         Register base = InputRegister(NextOffset(offset));
     111      455182 :         Register index = InputRegister(NextOffset(offset));
     112             :         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
     113             :         int32_t disp = InputInt32(NextOffset(offset));
     114      455180 :         return Operand(base, index, scale, disp);
     115             :       }
     116             :       case kMode_M1: {
     117           0 :         Register base = InputRegister(NextOffset(offset));
     118             :         int32_t disp = 0;
     119           0 :         return Operand(base, disp);
     120             :       }
     121             :       case kMode_M2:
     122           0 :         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
     123             :         return Operand(no_reg, 0);
     124             :       case kMode_M4:
     125             :       case kMode_M8: {
     126       19747 :         Register index = InputRegister(NextOffset(offset));
     127             :         ScaleFactor scale = ScaleFor(kMode_M1, mode);
     128             :         int32_t disp = 0;
     129       19747 :         return Operand(index, scale, disp);
     130             :       }
     131             :       case kMode_M1I:
     132             :       case kMode_M2I:
     133             :       case kMode_M4I:
     134             :       case kMode_M8I: {
     135      132467 :         Register index = InputRegister(NextOffset(offset));
     136             :         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
     137             :         int32_t disp = InputInt32(NextOffset(offset));
     138      132467 :         return Operand(index, scale, disp);
     139             :       }
     140             :       case kMode_Root: {
     141     1499776 :         Register base = kRootRegister;
     142             :         int32_t disp = InputInt32(NextOffset(offset));
     143     1499776 :         return Operand(base, disp);
     144             :       }
     145             :       case kMode_None:
     146           0 :         UNREACHABLE();
     147             :     }
     148           0 :     UNREACHABLE();
     149             :   }
     150             : 
     151             :   Operand MemoryOperand(size_t first_input = 0) {
     152     9281912 :     return MemoryOperand(&first_input);
     153             :   }
     154             : };
     155             : 
     156             : namespace {
     157             : 
     158             : bool HasImmediateInput(Instruction* instr, size_t index) {
     159             :   return instr->InputAt(index)->IsImmediate();
     160             : }
     161             : 
     162           0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
     163             :  public:
     164             :   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
     165         132 :       : OutOfLineCode(gen), result_(result) {}
     166             : 
     167         132 :   void Generate() final {
     168             :     __ Xorps(result_, result_);
     169             :     __ Divss(result_, result_);
     170         132 :   }
     171             : 
     172             :  private:
     173             :   XMMRegister const result_;
     174             : };
     175             : 
     176           0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
     177             :  public:
     178             :   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
     179         591 :       : OutOfLineCode(gen), result_(result) {}
     180             : 
     181         590 :   void Generate() final {
     182             :     __ Xorpd(result_, result_);
     183             :     __ Divsd(result_, result_);
     184         591 :   }
     185             : 
     186             :  private:
     187             :   XMMRegister const result_;
     188             : };
     189             : 
     190           0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
     191             :  public:
     192             :   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
     193             :                              XMMRegister input, StubCallMode stub_mode,
     194             :                              UnwindingInfoWriter* unwinding_info_writer)
     195             :       : OutOfLineCode(gen),
     196             :         result_(result),
     197             :         input_(input),
     198             :         stub_mode_(stub_mode),
     199             :         unwinding_info_writer_(unwinding_info_writer),
     200             :         isolate_(gen->isolate()),
     201       53479 :         zone_(gen->zone()) {}
     202             : 
     203       53473 :   void Generate() final {
     204       53473 :     __ subq(rsp, Immediate(kDoubleSize));
     205       53477 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     206       53477 :                                                       kDoubleSize);
     207      106952 :     __ Movsd(MemOperand(rsp, 0), input_);
     208       53473 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     209             :       // A direct call to a wasm runtime stub defined in this module.
     210             :       // Just encode the stub index. This will be patched when the code
     211             :       // is added to the native module and copied into wasm code space.
     212        1626 :       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
     213             :     } else {
     214      103694 :       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     215             :     }
     216      106958 :     __ movl(result_, MemOperand(rsp, 0));
     217       53479 :     __ addq(rsp, Immediate(kDoubleSize));
     218       53478 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     219       53478 :                                                       -kDoubleSize);
     220       53478 :   }
     221             : 
     222             :  private:
     223             :   Register const result_;
     224             :   XMMRegister const input_;
     225             :   StubCallMode stub_mode_;
     226             :   UnwindingInfoWriter* const unwinding_info_writer_;
     227             :   Isolate* isolate_;
     228             :   Zone* zone_;
     229             : };
     230             : 
     231           0 : class OutOfLineRecordWrite final : public OutOfLineCode {
     232             :  public:
     233             :   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
     234             :                        Register value, Register scratch0, Register scratch1,
     235             :                        RecordWriteMode mode, StubCallMode stub_mode)
     236             :       : OutOfLineCode(gen),
     237             :         object_(object),
     238             :         operand_(operand),
     239             :         value_(value),
     240             :         scratch0_(scratch0),
     241             :         scratch1_(scratch1),
     242             :         mode_(mode),
     243             :         stub_mode_(stub_mode),
     244      320389 :         zone_(gen->zone()) {}
     245             : 
     246      320388 :   void Generate() final {
     247      320388 :     if (mode_ > RecordWriteMode::kValueIsPointer) {
     248      253115 :       __ JumpIfSmi(value_, exit());
     249             :     }
     250             :     __ CheckPageFlag(value_, scratch0_,
     251             :                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
     252      320388 :                      exit());
     253      320389 :     __ leaq(scratch1_, operand_);
     254             : 
     255             :     RememberedSetAction const remembered_set_action =
     256      320389 :         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
     257      320389 :                                              : OMIT_REMEMBERED_SET;
     258             :     SaveFPRegsMode const save_fp_mode =
     259      320389 :         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
     260             : 
     261      320389 :     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
     262         112 :       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
     263      320277 :     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     264             :       // A direct call to a wasm runtime stub defined in this module.
     265             :       // Just encode the stub index. This will be patched when the code
     266             :       // is added to the native module and copied into wasm code space.
     267             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     268         286 :                              save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
     269             :     } else {
     270             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     271      319991 :                              save_fp_mode);
     272             :     }
     273      320389 :   }
     274             : 
     275             :  private:
     276             :   Register const object_;
     277             :   Operand const operand_;
     278             :   Register const value_;
     279             :   Register const scratch0_;
     280             :   Register const scratch1_;
     281             :   RecordWriteMode const mode_;
     282             :   StubCallMode const stub_mode_;
     283             :   Zone* zone_;
     284             : };
     285             : 
     286           0 : class WasmOutOfLineTrap : public OutOfLineCode {
     287             :  public:
     288             :   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
     289      384553 :       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
     290             : 
     291      141996 :   void Generate() override {
     292      141996 :     X64OperandConverter i(gen_, instr_);
     293             :     TrapId trap_id =
     294      284064 :         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
     295             :     GenerateWithTrapId(trap_id);
     296      142119 :   }
     297             : 
     298             :  protected:
     299             :   CodeGenerator* gen_;
     300             : 
     301      384091 :   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
     302             : 
     303             :  private:
     304      383923 :   void GenerateCallToTrap(TrapId trap_id) {
     305      383923 :     if (!gen_->wasm_runtime_exception_support()) {
     306             :       // We cannot test calls to the runtime in cctest/test-run-wasm.
     307             :       // Therefore we emit a call to C here instead of a call to the runtime.
     308      153836 :       __ PrepareCallCFunction(0);
     309      153836 :       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
     310      153836 :                        0);
     311      153836 :       __ LeaveFrame(StackFrame::WASM_COMPILED);
     312      153836 :       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
     313             :       size_t pop_size =
     314      153836 :           call_descriptor->StackParameterCount() * kSystemPointerSize;
     315             :       // Use rcx as a scratch register, we return anyways immediately.
     316      153836 :       __ Ret(static_cast<int>(pop_size), rcx);
     317             :     } else {
     318      230220 :       gen_->AssembleSourcePosition(instr_);
     319             :       // A direct call to a wasm runtime stub defined in this module.
     320             :       // Just encode the stub index. This will be patched when the code
     321             :       // is added to the native module and copied into wasm code space.
     322      230658 :       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
     323             :       ReferenceMap* reference_map =
     324      230285 :           new (gen_->zone()) ReferenceMap(gen_->zone());
     325      230107 :       gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
     326      230107 :                             Safepoint::kNoLazyDeopt);
     327      230961 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
     328             :     }
     329      384192 :   }
     330             : 
     331             :   Instruction* instr_;
     332             : };
     333             : 
     334           0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
     335             :  public:
     336             :   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
     337      241659 :       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
     338             : 
     339      241548 :   void Generate() final {
     340      241548 :     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
     341      242023 :     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
     342      242090 :   }
     343             : 
     344             :  private:
     345             :   int pc_;
     346             : };
     347             : 
     348    12572853 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
     349             :                          InstructionCode opcode, Instruction* instr,
     350             :                          X64OperandConverter& i, int pc) {
     351             :   const MemoryAccessMode access_mode =
     352    12572853 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     353    12572853 :   if (access_mode == kMemoryAccessProtected) {
     354             :     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
     355             :   }
     356    12572054 : }
     357             : 
     358    11906460 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
     359             :                                    InstructionCode opcode, Instruction* instr,
     360             :                                    X64OperandConverter& i) {
     361             :   const MemoryAccessMode access_mode =
     362    11906460 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     363    11906460 :   if (access_mode == kMemoryAccessPoisoned) {
     364             :     Register value = i.OutputRegister();
     365           0 :     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
     366             :   }
     367    11906460 : }
     368             : 
     369             : }  // namespace
     370             : 
     371             : #define ASSEMBLE_UNOP(asm_instr)         \
     372             :   do {                                   \
     373             :     if (instr->Output()->IsRegister()) { \
     374             :       __ asm_instr(i.OutputRegister());  \
     375             :     } else {                             \
     376             :       __ asm_instr(i.OutputOperand());   \
     377             :     }                                    \
     378             :   } while (false)
     379             : 
     380             : #define ASSEMBLE_BINOP(asm_instr)                                     \
     381             :   do {                                                                \
     382             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     383             :       size_t index = 1;                                               \
     384             :       Operand right = i.MemoryOperand(&index);                        \
     385             :       __ asm_instr(i.InputRegister(0), right);                        \
     386             :     } else {                                                          \
     387             :       if (HasImmediateInput(instr, 1)) {                              \
     388             :         if (instr->InputAt(0)->IsRegister()) {                        \
     389             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     390             :         } else {                                                      \
     391             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     392             :         }                                                             \
     393             :       } else {                                                        \
     394             :         if (instr->InputAt(1)->IsRegister()) {                        \
     395             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     396             :         } else {                                                      \
     397             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     398             :         }                                                             \
     399             :       }                                                               \
     400             :     }                                                                 \
     401             :   } while (false)
     402             : 
     403             : #define ASSEMBLE_COMPARE(asm_instr)                                   \
     404             :   do {                                                                \
     405             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     406             :       size_t index = 0;                                               \
     407             :       Operand left = i.MemoryOperand(&index);                         \
     408             :       if (HasImmediateInput(instr, index)) {                          \
     409             :         __ asm_instr(left, i.InputImmediate(index));                  \
     410             :       } else {                                                        \
     411             :         __ asm_instr(left, i.InputRegister(index));                   \
     412             :       }                                                               \
     413             :     } else {                                                          \
     414             :       if (HasImmediateInput(instr, 1)) {                              \
     415             :         if (instr->InputAt(0)->IsRegister()) {                        \
     416             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     417             :         } else {                                                      \
     418             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     419             :         }                                                             \
     420             :       } else {                                                        \
     421             :         if (instr->InputAt(1)->IsRegister()) {                        \
     422             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     423             :         } else {                                                      \
     424             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     425             :         }                                                             \
     426             :       }                                                               \
     427             :     }                                                                 \
     428             :   } while (false)
     429             : 
     430             : #define ASSEMBLE_MULT(asm_instr)                              \
     431             :   do {                                                        \
     432             :     if (HasImmediateInput(instr, 1)) {                        \
     433             :       if (instr->InputAt(0)->IsRegister()) {                  \
     434             :         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
     435             :                      i.InputImmediate(1));                    \
     436             :       } else {                                                \
     437             :         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
     438             :                      i.InputImmediate(1));                    \
     439             :       }                                                       \
     440             :     } else {                                                  \
     441             :       if (instr->InputAt(1)->IsRegister()) {                  \
     442             :         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
     443             :       } else {                                                \
     444             :         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
     445             :       }                                                       \
     446             :     }                                                         \
     447             :   } while (false)
     448             : 
     449             : #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
     450             :   do {                                                                     \
     451             :     if (HasImmediateInput(instr, 1)) {                                     \
     452             :       if (instr->Output()->IsRegister()) {                                 \
     453             :         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
     454             :       } else {                                                             \
     455             :         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
     456             :       }                                                                    \
     457             :     } else {                                                               \
     458             :       if (instr->Output()->IsRegister()) {                                 \
     459             :         __ asm_instr##_cl(i.OutputRegister());                             \
     460             :       } else {                                                             \
     461             :         __ asm_instr##_cl(i.OutputOperand());                              \
     462             :       }                                                                    \
     463             :     }                                                                      \
     464             :   } while (false)
     465             : 
     466             : #define ASSEMBLE_MOVX(asm_instr)                            \
     467             :   do {                                                      \
     468             :     if (instr->addressing_mode() != kMode_None) {           \
     469             :       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
     470             :     } else if (instr->InputAt(0)->IsRegister()) {           \
     471             :       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     472             :     } else {                                                \
     473             :       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
     474             :     }                                                       \
     475             :   } while (false)
     476             : 
     477             : #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
     478             :   do {                                                                  \
     479             :     if (instr->InputAt(1)->IsFPRegister()) {                            \
     480             :       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
     481             :     } else {                                                            \
     482             :       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
     483             :     }                                                                   \
     484             :   } while (false)
     485             : 
     486             : #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
     487             :   do {                                                                  \
     488             :     if (instr->InputAt(0)->IsFPRegister()) {                            \
     489             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
     490             :     } else {                                                            \
     491             :       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
     492             :     }                                                                   \
     493             :   } while (false)
     494             : 
     495             : #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
     496             :   do {                                                                 \
     497             :     CpuFeatureScope avx_scope(tasm(), AVX);                            \
     498             :     if (instr->InputAt(1)->IsFPRegister()) {                           \
     499             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     500             :                    i.InputDoubleRegister(1));                          \
     501             :     } else {                                                           \
     502             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     503             :                    i.InputOperand(1));                                 \
     504             :     }                                                                  \
     505             :   } while (false)
     506             : 
     507             : #define ASSEMBLE_IEEE754_BINOP(name)                                     \
     508             :   do {                                                                   \
     509             :     __ PrepareCallCFunction(2);                                          \
     510             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
     511             :   } while (false)
     512             : 
     513             : #define ASSEMBLE_IEEE754_UNOP(name)                                      \
     514             :   do {                                                                   \
     515             :     __ PrepareCallCFunction(1);                                          \
     516             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
     517             :   } while (false)
     518             : 
     519             : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     520             :   do {                                                          \
     521             :     Label binop;                                                \
     522             :     __ bind(&binop);                                            \
     523             :     __ mov_inst(rax, i.MemoryOperand(1));                       \
     524             :     __ movl(i.TempRegister(0), rax);                            \
     525             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
     526             :     __ lock();                                                  \
     527             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
     528             :     __ j(not_equal, &binop);                                    \
     529             :   } while (false)
     530             : 
     531             : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     532             :   do {                                                            \
     533             :     Label binop;                                                  \
     534             :     __ bind(&binop);                                              \
     535             :     __ mov_inst(rax, i.MemoryOperand(1));                         \
     536             :     __ movq(i.TempRegister(0), rax);                              \
     537             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
     538             :     __ lock();                                                    \
     539             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
     540             :     __ j(not_equal, &binop);                                      \
     541             :   } while (false)
     542             : 
     543             : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
     544             :   do {                                                       \
     545             :     if (instr->InputAt(index)->IsSimd128Register()) {        \
     546             :       __ opcode(dst_operand, i.InputSimd128Register(index)); \
     547             :     } else {                                                 \
     548             :       __ opcode(dst_operand, i.InputOperand(index));         \
     549             :     }                                                        \
     550             :   } while (false)
     551             : 
     552             : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
     553             :   do {                                                            \
     554             :     if (instr->InputAt(index)->IsSimd128Register()) {             \
     555             :       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
     556             :     } else {                                                      \
     557             :       __ opcode(dst_operand, i.InputOperand(index), imm);         \
     558             :     }                                                             \
     559             :   } while (false)
     560             : 
     561             : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
     562             :   do {                                                   \
     563             :     XMMRegister dst = i.OutputSimd128Register();         \
     564             :     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
     565             :     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
     566             :     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
     567             :   } while (false)
     568             : 
     569             : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)                  \
     570             :   do {                                                                    \
     571             :     CpuFeatureScope sse_scope(tasm(), SSELevel);                          \
     572             :     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
     573             :     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
     574             :   } while (false)
     575             : 
     576     2647565 : void CodeGenerator::AssembleDeconstructFrame() {
     577     2647565 :   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
     578     2647755 :   __ movq(rsp, rbp);
     579     2648486 :   __ popq(rbp);
     580     2648506 : }
     581             : 
     582      119851 : void CodeGenerator::AssemblePrepareTailCall() {
     583      119851 :   if (frame_access_state()->has_frame()) {
     584      137126 :     __ movq(rbp, MemOperand(rbp, 0));
     585             :   }
     586             :   frame_access_state()->SetFrameAccessToSP();
     587      119851 : }
     588             : 
     589        1288 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
     590             :                                                      Register scratch1,
     591             :                                                      Register scratch2,
     592             :                                                      Register scratch3) {
     593             :   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
     594        1288 :   Label done;
     595             : 
     596             :   // Check if current frame is an arguments adaptor frame.
     597        2576 :   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
     598        1288 :           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
     599        1288 :   __ j(not_equal, &done, Label::kNear);
     600             : 
     601             :   // Load arguments count from current arguments adaptor frame (note, it
     602             :   // does not include receiver).
     603        1288 :   Register caller_args_count_reg = scratch1;
     604        2576 :   __ SmiUntag(caller_args_count_reg,
     605        1288 :               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
     606             : 
     607             :   ParameterCount callee_args_count(args_reg);
     608             :   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
     609        1288 :                         scratch3);
     610        1288 :   __ bind(&done);
     611        1288 : }
     612             : 
     613             : namespace {
     614             : 
     615      271410 : void AdjustStackPointerForTailCall(Assembler* assembler,
     616             :                                    FrameAccessState* state,
     617             :                                    int new_slot_above_sp,
     618             :                                    bool allow_shrinkage = true) {
     619             :   int current_sp_offset = state->GetSPToFPSlotCount() +
     620      271410 :                           StandardFrameConstants::kFixedSlotCountAboveFp;
     621      271410 :   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
     622      271410 :   if (stack_slot_delta > 0) {
     623         792 :     assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
     624             :     state->IncreaseSPDelta(stack_slot_delta);
     625      270618 :   } else if (allow_shrinkage && stack_slot_delta < 0) {
     626       68199 :     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
     627             :     state->IncreaseSPDelta(stack_slot_delta);
     628             :   }
     629      271410 : }
     630             : 
     631        2224 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
     632        2224 :   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
     633        2224 :   assembler->movq(kScratchRegister, shuffle_mask);
     634        2224 :   assembler->Push(kScratchRegister);
     635        2224 :   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
     636             :   assembler->movq(kScratchRegister, shuffle_mask);
     637        2224 :   assembler->Push(kScratchRegister);
     638        2224 : }
     639             : 
     640             : }  // namespace
     641             : 
     642      119863 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
     643             :                                               int first_unused_stack_slot) {
     644      119863 :   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
     645             :   ZoneVector<MoveOperands*> pushes(zone());
     646      119863 :   GetPushCompatibleMoves(instr, flags, &pushes);
     647             : 
     648      132919 :   if (!pushes.empty() &&
     649       26112 :       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
     650             :        first_unused_stack_slot)) {
     651             :     X64OperandConverter g(this, instr);
     652       44740 :     for (auto move : pushes) {
     653             :       LocationOperand destination_location(
     654             :           LocationOperand::cast(move->destination()));
     655       31684 :       InstructionOperand source(move->source());
     656             :       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     657       31684 :                                     destination_location.index());
     658       31684 :       if (source.IsStackSlot()) {
     659             :         LocationOperand source_location(LocationOperand::cast(source));
     660       13068 :         __ Push(g.SlotToOperand(source_location.index()));
     661       18616 :       } else if (source.IsRegister()) {
     662             :         LocationOperand source_location(LocationOperand::cast(source));
     663       18616 :         __ Push(source_location.GetRegister());
     664           0 :       } else if (source.IsImmediate()) {
     665           0 :         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
     666             :       } else {
     667             :         // Pushes of non-scalar data types is not supported.
     668           0 :         UNIMPLEMENTED();
     669             :       }
     670             :       frame_access_state()->IncreaseSPDelta(1);
     671             :       move->Eliminate();
     672             :     }
     673             :   }
     674             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     675      119863 :                                 first_unused_stack_slot, false);
     676      119863 : }
     677             : 
     678      119863 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
     679             :                                              int first_unused_stack_slot) {
     680             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     681      119863 :                                 first_unused_stack_slot);
     682      119863 : }
     683             : 
     684             : // Check that {kJavaScriptCallCodeStartRegister} is correct.
     685         114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
     686         114 :   __ ComputeCodeStartAddress(rbx);
     687         114 :   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
     688         114 :   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
     689         114 : }
     690             : 
     691             : // Check if the code object is marked for deoptimization. If it is, then it
     692             : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
     693             : // to:
     694             : //    1. read from memory the word that contains that bit, which can be found in
     695             : //       the flags in the referenced {CodeDataContainer} object;
     696             : //    2. test kMarkedForDeoptimizationBit in those flags; and
     697             : //    3. if it is not zero then it jumps to the builtin.
     698      464073 : void CodeGenerator::BailoutIfDeoptimized() {
     699             :   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
     700      928144 :   __ LoadTaggedPointerField(rbx,
     701      464073 :                             Operand(kJavaScriptCallCodeStartRegister, offset));
     702      464072 :   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
     703             :            Immediate(1 << Code::kMarkedForDeoptimizationBit));
     704      464071 :   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
     705      464073 :           RelocInfo::CODE_TARGET, not_zero);
     706      464072 : }
     707             : 
     708           0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
     709             :   // Set a mask which has all bits set in the normal case, but has all
     710             :   // bits cleared if we are speculatively executing the wrong PC.
     711           0 :   __ ComputeCodeStartAddress(rbx);
     712           0 :   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
     713             :   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
     714             :   __ movq(rbx, Immediate(-1));
     715           0 :   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
     716           0 : }
     717             : 
     718           0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
     719           0 :   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
     720             :   __ andq(kContextRegister, kSpeculationPoisonRegister);
     721             :   __ andq(rsp, kSpeculationPoisonRegister);
     722           0 : }
     723             : 
     724             : // Assembles an instruction after register allocation, producing machine code.
     725    68878638 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     726             :     Instruction* instr) {
     727             :   X64OperandConverter i(this, instr);
     728             :   InstructionCode opcode = instr->opcode();
     729    68878638 :   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
     730    68878638 :   switch (arch_opcode) {
     731             :     case kArchCallCodeObject: {
     732     4828062 :       if (HasImmediateInput(instr, 0)) {
     733     4451568 :         Handle<Code> code = i.InputCode(0);
     734     4451568 :         __ Call(code, RelocInfo::CODE_TARGET);
     735             :       } else {
     736      376499 :         Register reg = i.InputRegister(0);
     737             :         DCHECK_IMPLIES(
     738             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     739             :             reg == kJavaScriptCallCodeStartRegister);
     740      376499 :         __ LoadCodeObjectEntry(reg, reg);
     741      376503 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     742           0 :           __ RetpolineCall(reg);
     743             :         } else {
     744      376503 :           __ call(reg);
     745             :         }
     746             :       }
     747     4828084 :       RecordCallPosition(instr);
     748             :       frame_access_state()->ClearSPDelta();
     749             :       break;
     750             :     }
     751             :     case kArchCallBuiltinPointer: {
     752             :       DCHECK(!HasImmediateInput(instr, 0));
     753        3608 :       Register builtin_pointer = i.InputRegister(0);
     754        3608 :       __ CallBuiltinPointer(builtin_pointer);
     755        3608 :       RecordCallPosition(instr);
     756             :       frame_access_state()->ClearSPDelta();
     757             :       break;
     758             :     }
     759             :     case kArchCallWasmFunction: {
     760     1181972 :       if (HasImmediateInput(instr, 0)) {
     761      192502 :         Constant constant = i.ToConstant(instr->InputAt(0));
     762      192564 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     763      192564 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     764      192613 :           __ near_call(wasm_code, constant.rmode());
     765             :         } else {
     766           0 :           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     767           0 :             __ RetpolineCall(wasm_code, constant.rmode());
     768             :           } else {
     769           0 :             __ Call(wasm_code, constant.rmode());
     770             :           }
     771             :         }
     772             :       } else {
     773      989470 :         Register reg = i.InputRegister(0);
     774      989470 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     775           0 :           __ RetpolineCall(reg);
     776             :         } else {
     777      989470 :           __ call(reg);
     778             :         }
     779             :       }
     780     1182044 :       RecordCallPosition(instr);
     781             :       frame_access_state()->ClearSPDelta();
     782             :       break;
     783             :     }
     784             :     case kArchTailCallCodeObjectFromJSFunction:
     785             :     case kArchTailCallCodeObject: {
     786       36864 :       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
     787             :         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
     788             :                                          i.TempRegister(0), i.TempRegister(1),
     789        1288 :                                          i.TempRegister(2));
     790             :       }
     791       36864 :       if (HasImmediateInput(instr, 0)) {
     792       31240 :         Handle<Code> code = i.InputCode(0);
     793       31240 :         __ Jump(code, RelocInfo::CODE_TARGET);
     794             :       } else {
     795        5624 :         Register reg = i.InputRegister(0);
     796             :         DCHECK_IMPLIES(
     797             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     798             :             reg == kJavaScriptCallCodeStartRegister);
     799        5624 :         __ LoadCodeObjectEntry(reg, reg);
     800        5624 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     801           0 :           __ RetpolineJump(reg);
     802             :         } else {
     803        5624 :           __ jmp(reg);
     804             :         }
     805             :       }
     806             :       unwinding_info_writer_.MarkBlockWillExit();
     807             :       frame_access_state()->ClearSPDelta();
     808       36864 :       frame_access_state()->SetFrameAccessToDefault();
     809       36864 :       break;
     810             :     }
     811             :     case kArchTailCallWasm: {
     812         219 :       if (HasImmediateInput(instr, 0)) {
     813         129 :         Constant constant = i.ToConstant(instr->InputAt(0));
     814             :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     815         129 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     816         129 :           __ near_jmp(wasm_code, constant.rmode());
     817             :         } else {
     818             :           __ Move(kScratchRegister, wasm_code, constant.rmode());
     819           0 :           __ jmp(kScratchRegister);
     820             :         }
     821             :       } else {
     822          90 :         Register reg = i.InputRegister(0);
     823          90 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     824           0 :           __ RetpolineJump(reg);
     825             :         } else {
     826          90 :           __ jmp(reg);
     827             :         }
     828             :       }
     829             :       unwinding_info_writer_.MarkBlockWillExit();
     830             :       frame_access_state()->ClearSPDelta();
     831         219 :       frame_access_state()->SetFrameAccessToDefault();
     832         219 :       break;
     833             :     }
     834             :     case kArchTailCallAddress: {
     835       82768 :       CHECK(!HasImmediateInput(instr, 0));
     836       82768 :       Register reg = i.InputRegister(0);
     837             :       DCHECK_IMPLIES(
     838             :           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     839             :           reg == kJavaScriptCallCodeStartRegister);
     840       82768 :       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     841           0 :         __ RetpolineJump(reg);
     842             :       } else {
     843       82768 :         __ jmp(reg);
     844             :       }
     845             :       unwinding_info_writer_.MarkBlockWillExit();
     846             :       frame_access_state()->ClearSPDelta();
     847       82768 :       frame_access_state()->SetFrameAccessToDefault();
     848             :       break;
     849             :     }
     850             :     case kArchCallJSFunction: {
     851             :       Register func = i.InputRegister(0);
     852       23882 :       if (FLAG_debug_code) {
     853             :         // Check the function's context matches the context argument.
     854           8 :         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
     855           8 :         __ Assert(equal, AbortReason::kWrongFunctionContext);
     856             :       }
     857             :       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
     858             :       __ LoadTaggedPointerField(rcx,
     859       23883 :                                 FieldOperand(func, JSFunction::kCodeOffset));
     860       23882 :       __ CallCodeObject(rcx);
     861             :       frame_access_state()->ClearSPDelta();
     862       23882 :       RecordCallPosition(instr);
     863             :       break;
     864             :     }
     865             :     case kArchPrepareCallCFunction: {
     866             :       // Frame alignment requires using FP-relative frame addressing.
     867             :       frame_access_state()->SetFrameAccessToFP();
     868       25882 :       int const num_parameters = MiscField::decode(instr->opcode());
     869       25882 :       __ PrepareCallCFunction(num_parameters);
     870       25882 :       break;
     871             :     }
     872             :     case kArchSaveCallerRegisters: {
     873             :       fp_mode_ =
     874         900 :           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
     875             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     876             :       // kReturnRegister0 should have been saved before entering the stub.
     877         900 :       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
     878             :       DCHECK(IsAligned(bytes, kSystemPointerSize));
     879             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     880         900 :       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     881             :       DCHECK(!caller_registers_saved_);
     882         900 :       caller_registers_saved_ = true;
     883         900 :       break;
     884             :     }
     885             :     case kArchRestoreCallerRegisters: {
     886             :       DCHECK(fp_mode_ ==
     887             :              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
     888             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     889             :       // Don't overwrite the returned value.
     890        1800 :       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
     891         900 :       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
     892             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     893             :       DCHECK(caller_registers_saved_);
     894         900 :       caller_registers_saved_ = false;
     895         900 :       break;
     896             :     }
     897             :     case kArchPrepareTailCall:
     898      119851 :       AssemblePrepareTailCall();
     899      119851 :       break;
     900             :     case kArchCallCFunction: {
     901             :       int const num_parameters = MiscField::decode(instr->opcode());
     902       25882 :       if (HasImmediateInput(instr, 0)) {
     903       24742 :         ExternalReference ref = i.InputExternalReference(0);
     904       24742 :         __ CallCFunction(ref, num_parameters);
     905             :       } else {
     906        1140 :         Register func = i.InputRegister(0);
     907        1140 :         __ CallCFunction(func, num_parameters);
     908             :       }
     909       25882 :       frame_access_state()->SetFrameAccessToDefault();
     910             :       // Ideally, we should decrement SP delta to match the change of stack
     911             :       // pointer in CallCFunction. However, for certain architectures (e.g.
     912             :       // ARM), there may be more strict alignment requirement, causing old SP
     913             :       // to be saved on the stack. In those cases, we can not calculate the SP
     914             :       // delta statically.
     915             :       frame_access_state()->ClearSPDelta();
     916       25882 :       if (caller_registers_saved_) {
     917             :         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
     918             :         // Here, we assume the sequence to be:
     919             :         //   kArchSaveCallerRegisters;
     920             :         //   kArchCallCFunction;
     921             :         //   kArchRestoreCallerRegisters;
     922             :         int bytes =
     923        1800 :             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
     924         900 :         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     925             :       }
     926             :       // TODO(tebbi): Do we need an lfence here?
     927             :       break;
     928             :     }
     929             :     case kArchJmp:
     930     4978083 :       AssembleArchJump(i.InputRpo(0));
     931     4978141 :       break;
     932             :     case kArchBinarySearchSwitch:
     933       34384 :       AssembleArchBinarySearchSwitch(instr);
     934       34385 :       break;
     935             :     case kArchLookupSwitch:
     936           0 :       AssembleArchLookupSwitch(instr);
     937           0 :       break;
     938             :     case kArchTableSwitch:
     939         344 :       AssembleArchTableSwitch(instr);
     940         344 :       break;
     941             :     case kArchComment:
     942           4 :       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
     943           4 :       break;
     944             :     case kArchDebugAbort:
     945             :       DCHECK(i.InputRegister(0) == rdx);
     946         160 :       if (!frame_access_state()->has_frame()) {
     947             :         // We don't actually want to generate a pile of code for this, so just
     948             :         // claim there is a stack frame, without generating one.
     949          16 :         FrameScope scope(tasm(), StackFrame::NONE);
     950          16 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     951          16 :                 RelocInfo::CODE_TARGET);
     952             :       } else {
     953         144 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     954         144 :                 RelocInfo::CODE_TARGET);
     955             :       }
     956         160 :       __ int3();
     957             :       unwinding_info_writer_.MarkBlockWillExit();
     958             :       break;
     959             :     case kArchDebugBreak:
     960      256122 :       __ int3();
     961      256122 :       break;
     962             :     case kArchThrowTerminator:
     963             :       unwinding_info_writer_.MarkBlockWillExit();
     964             :       break;
     965             :     case kArchNop:
     966             :       // don't emit code for nops.
     967             :       break;
     968             :     case kArchDeoptimize: {
     969             :       int deopt_state_id =
     970       44616 :           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
     971             :       CodeGenResult result =
     972       44616 :           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
     973       44616 :       if (result != kSuccess) return result;
     974             :       unwinding_info_writer_.MarkBlockWillExit();
     975             :       break;
     976             :     }
     977             :     case kArchRet:
     978     2999241 :       AssembleReturn(instr->InputAt(0));
     979     2999140 :       break;
     980             :     case kArchStackPointer:
     981           0 :       __ movq(i.OutputRegister(), rsp);
     982             :       break;
     983             :     case kArchFramePointer:
     984       32802 :       __ movq(i.OutputRegister(), rbp);
     985             :       break;
     986             :     case kArchParentFramePointer:
     987       58132 :       if (frame_access_state()->has_frame()) {
     988       80148 :         __ movq(i.OutputRegister(), Operand(rbp, 0));
     989             :       } else {
     990       31416 :         __ movq(i.OutputRegister(), rbp);
     991             :       }
     992             :       break;
     993             :     case kArchTruncateDoubleToI: {
     994             :       auto result = i.OutputRegister();
     995             :       auto input = i.InputDoubleRegister(0);
     996             :       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
     997             :           this, result, input, DetermineStubCallMode(),
     998      106953 :           &unwinding_info_writer_);
     999             :       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
    1000             :       // use of Cvttsd2siq requires the movl below to avoid sign extension.
    1001       53475 :       __ Cvttsd2siq(result, input);
    1002       53478 :       __ cmpq(result, Immediate(1));
    1003       53478 :       __ j(overflow, ool->entry());
    1004       53478 :       __ bind(ool->exit());
    1005             :       __ movl(result, result);
    1006             :       break;
    1007             :     }
    1008             :     case kArchStoreWithWriteBarrier: {
    1009             :       RecordWriteMode mode =
    1010             :           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
    1011             :       Register object = i.InputRegister(0);
    1012      320388 :       size_t index = 0;
    1013      320388 :       Operand operand = i.MemoryOperand(&index);
    1014      320388 :       Register value = i.InputRegister(index);
    1015             :       Register scratch0 = i.TempRegister(0);
    1016             :       Register scratch1 = i.TempRegister(1);
    1017             :       auto ool = new (zone())
    1018             :           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
    1019      640776 :                                mode, DetermineStubCallMode());
    1020      320388 :       __ StoreTaggedField(operand, value);
    1021             :       __ CheckPageFlag(object, scratch0,
    1022             :                        MemoryChunk::kPointersFromHereAreInterestingMask,
    1023      320388 :                        not_zero, ool->entry());
    1024      320389 :       __ bind(ool->exit());
    1025             :       break;
    1026             :     }
    1027             :     case kArchWordPoisonOnSpeculation:
    1028             :       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
    1029           0 :       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
    1030             :       break;
    1031             :     case kLFence:
    1032           0 :       __ lfence();
    1033           0 :       break;
    1034             :     case kArchStackSlot: {
    1035             :       FrameOffset offset =
    1036      367595 :           frame_access_state()->GetFrameOffset(i.InputInt32(0));
    1037      367599 :       Register base = offset.from_stack_pointer() ? rsp : rbp;
    1038     1102820 :       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
    1039             :       break;
    1040             :     }
    1041             :     case kIeee754Float64Acos:
    1042         116 :       ASSEMBLE_IEEE754_UNOP(acos);
    1043         116 :       break;
    1044             :     case kIeee754Float64Acosh:
    1045         116 :       ASSEMBLE_IEEE754_UNOP(acosh);
    1046         116 :       break;
    1047             :     case kIeee754Float64Asin:
    1048         116 :       ASSEMBLE_IEEE754_UNOP(asin);
    1049         116 :       break;
    1050             :     case kIeee754Float64Asinh:
    1051         116 :       ASSEMBLE_IEEE754_UNOP(asinh);
    1052         116 :       break;
    1053             :     case kIeee754Float64Atan:
    1054         133 :       ASSEMBLE_IEEE754_UNOP(atan);
    1055         133 :       break;
    1056             :     case kIeee754Float64Atanh:
    1057         116 :       ASSEMBLE_IEEE754_UNOP(atanh);
    1058         116 :       break;
    1059             :     case kIeee754Float64Atan2:
    1060         129 :       ASSEMBLE_IEEE754_BINOP(atan2);
    1061         129 :       break;
    1062             :     case kIeee754Float64Cbrt:
    1063         116 :       ASSEMBLE_IEEE754_UNOP(cbrt);
    1064         116 :       break;
    1065             :     case kIeee754Float64Cos:
    1066         271 :       ASSEMBLE_IEEE754_UNOP(cos);
    1067         271 :       break;
    1068             :     case kIeee754Float64Cosh:
    1069         123 :       ASSEMBLE_IEEE754_UNOP(cosh);
    1070         123 :       break;
    1071             :     case kIeee754Float64Exp:
    1072         147 :       ASSEMBLE_IEEE754_UNOP(exp);
    1073         147 :       break;
    1074             :     case kIeee754Float64Expm1:
    1075         123 :       ASSEMBLE_IEEE754_UNOP(expm1);
    1076         123 :       break;
    1077             :     case kIeee754Float64Log:
    1078         252 :       ASSEMBLE_IEEE754_UNOP(log);
    1079         252 :       break;
    1080             :     case kIeee754Float64Log1p:
    1081         116 :       ASSEMBLE_IEEE754_UNOP(log1p);
    1082         116 :       break;
    1083             :     case kIeee754Float64Log2:
    1084         116 :       ASSEMBLE_IEEE754_UNOP(log2);
    1085         116 :       break;
    1086             :     case kIeee754Float64Log10:
    1087         116 :       ASSEMBLE_IEEE754_UNOP(log10);
    1088         116 :       break;
    1089             :     case kIeee754Float64Pow:
    1090         334 :       ASSEMBLE_IEEE754_BINOP(pow);
    1091         334 :       break;
    1092             :     case kIeee754Float64Sin:
    1093         268 :       ASSEMBLE_IEEE754_UNOP(sin);
    1094         268 :       break;
    1095             :     case kIeee754Float64Sinh:
    1096         123 :       ASSEMBLE_IEEE754_UNOP(sinh);
    1097         123 :       break;
    1098             :     case kIeee754Float64Tan:
    1099         168 :       ASSEMBLE_IEEE754_UNOP(tan);
    1100         168 :       break;
    1101             :     case kIeee754Float64Tanh:
    1102         123 :       ASSEMBLE_IEEE754_UNOP(tanh);
    1103         123 :       break;
    1104             :     case kX64Add32:
    1105      292303 :       ASSEMBLE_BINOP(addl);
    1106             :       break;
    1107             :     case kX64Add:
    1108      306896 :       ASSEMBLE_BINOP(addq);
    1109             :       break;
    1110             :     case kX64Sub32:
    1111      182980 :       ASSEMBLE_BINOP(subl);
    1112             :       break;
    1113             :     case kX64Sub:
    1114      221018 :       ASSEMBLE_BINOP(subq);
    1115             :       break;
    1116             :     case kX64And32:
    1117      720342 :       ASSEMBLE_BINOP(andl);
    1118             :       break;
    1119             :     case kX64And:
    1120      932921 :       ASSEMBLE_BINOP(andq);
    1121             :       break;
    1122             :     case kX64Cmp8:
    1123       36197 :       ASSEMBLE_COMPARE(cmpb);
    1124             :       break;
    1125             :     case kX64Cmp16:
    1126     1297156 :       ASSEMBLE_COMPARE(cmpw);
    1127             :       break;
    1128             :     case kX64Cmp32:
    1129     4747476 :       ASSEMBLE_COMPARE(cmpl);
    1130             :       break;
    1131             :     case kX64Cmp:
    1132     8594033 :       ASSEMBLE_COMPARE(cmpq);
    1133             :       break;
    1134             :     case kX64Test8:
    1135      320461 :       ASSEMBLE_COMPARE(testb);
    1136             :       break;
    1137             :     case kX64Test16:
    1138       91592 :       ASSEMBLE_COMPARE(testw);
    1139             :       break;
    1140             :     case kX64Test32:
    1141      476559 :       ASSEMBLE_COMPARE(testl);
    1142             :       break;
    1143             :     case kX64Test:
    1144     2674568 :       ASSEMBLE_COMPARE(testq);
    1145             :       break;
    1146             :     case kX64Imul32:
    1147      157444 :       ASSEMBLE_MULT(imull);
    1148             :       break;
    1149             :     case kX64Imul:
    1150       62116 :       ASSEMBLE_MULT(imulq);
    1151             :       break;
    1152             :     case kX64ImulHigh32:
    1153        4195 :       if (instr->InputAt(1)->IsRegister()) {
    1154        4195 :         __ imull(i.InputRegister(1));
    1155             :       } else {
    1156           0 :         __ imull(i.InputOperand(1));
    1157             :       }
    1158             :       break;
    1159             :     case kX64UmulHigh32:
    1160        1642 :       if (instr->InputAt(1)->IsRegister()) {
    1161        1642 :         __ mull(i.InputRegister(1));
    1162             :       } else {
    1163           0 :         __ mull(i.InputOperand(1));
    1164             :       }
    1165             :       break;
    1166             :     case kX64Idiv32:
    1167       31993 :       __ cdq();
    1168             :       __ idivl(i.InputRegister(1));
    1169             :       break;
    1170             :     case kX64Idiv:
    1171        2732 :       __ cqo();
    1172             :       __ idivq(i.InputRegister(1));
    1173             :       break;
    1174             :     case kX64Udiv32:
    1175       29117 :       __ xorl(rdx, rdx);
    1176             :       __ divl(i.InputRegister(1));
    1177             :       break;
    1178             :     case kX64Udiv:
    1179        1768 :       __ xorq(rdx, rdx);
    1180             :       __ divq(i.InputRegister(1));
    1181             :       break;
    1182             :     case kX64Not:
    1183          88 :       ASSEMBLE_UNOP(notq);
    1184             :       break;
    1185             :     case kX64Not32:
    1186        5556 :       ASSEMBLE_UNOP(notl);
    1187             :       break;
    1188             :     case kX64Neg:
    1189       17454 :       ASSEMBLE_UNOP(negq);
    1190             :       break;
    1191             :     case kX64Neg32:
    1192       12838 :       ASSEMBLE_UNOP(negl);
    1193             :       break;
    1194             :     case kX64Or32:
    1195      317969 :       ASSEMBLE_BINOP(orl);
    1196             :       break;
    1197             :     case kX64Or:
    1198      374900 :       ASSEMBLE_BINOP(orq);
    1199             :       break;
    1200             :     case kX64Xor32:
    1201       79853 :       ASSEMBLE_BINOP(xorl);
    1202             :       break;
    1203             :     case kX64Xor:
    1204        3272 :       ASSEMBLE_BINOP(xorq);
    1205             :       break;
    1206             :     case kX64Shl32:
    1207      118015 :       ASSEMBLE_SHIFT(shll, 5);
    1208             :       break;
    1209             :     case kX64Shl:
    1210     1430416 :       ASSEMBLE_SHIFT(shlq, 6);
    1211             :       break;
    1212             :     case kX64Shr32:
    1213      393990 :       ASSEMBLE_SHIFT(shrl, 5);
    1214             :       break;
    1215             :     case kX64Shr:
    1216     1435726 :       ASSEMBLE_SHIFT(shrq, 6);
    1217             :       break;
    1218             :     case kX64Sar32:
    1219       98792 :       ASSEMBLE_SHIFT(sarl, 5);
    1220             :       break;
    1221             :     case kX64Sar:
    1222      714344 :       ASSEMBLE_SHIFT(sarq, 6);
    1223             :       break;
    1224             :     case kX64Ror32:
    1225      110783 :       ASSEMBLE_SHIFT(rorl, 5);
    1226             :       break;
    1227             :     case kX64Ror:
    1228         338 :       ASSEMBLE_SHIFT(rorq, 6);
    1229             :       break;
    1230             :     case kX64Lzcnt:
    1231          34 :       if (instr->InputAt(0)->IsRegister()) {
    1232          34 :         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
    1233             :       } else {
    1234           0 :         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
    1235             :       }
    1236             :       break;
    1237             :     case kX64Lzcnt32:
    1238         664 :       if (instr->InputAt(0)->IsRegister()) {
    1239         624 :         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
    1240             :       } else {
    1241          80 :         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
    1242             :       }
    1243             :       break;
    1244             :     case kX64Tzcnt:
    1245          34 :       if (instr->InputAt(0)->IsRegister()) {
    1246          34 :         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
    1247             :       } else {
    1248           0 :         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
    1249             :       }
    1250             :       break;
    1251             :     case kX64Tzcnt32:
    1252         332 :       if (instr->InputAt(0)->IsRegister()) {
    1253         332 :         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
    1254             :       } else {
    1255           0 :         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
    1256             :       }
    1257             :       break;
    1258             :     case kX64Popcnt:
    1259          42 :       if (instr->InputAt(0)->IsRegister()) {
    1260          42 :         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
    1261             :       } else {
    1262           0 :         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
    1263             :       }
    1264             :       break;
    1265             :     case kX64Popcnt32:
    1266          80 :       if (instr->InputAt(0)->IsRegister()) {
    1267          80 :         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
    1268             :       } else {
    1269           0 :         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
    1270             :       }
    1271             :       break;
    1272             :     case kX64Bswap:
    1273          12 :       __ bswapq(i.OutputRegister());
    1274          12 :       break;
    1275             :     case kX64Bswap32:
    1276          44 :       __ bswapl(i.OutputRegister());
    1277          44 :       break;
    1278             :     case kSSEFloat32Cmp:
    1279           0 :       ASSEMBLE_SSE_BINOP(Ucomiss);
    1280             :       break;
    1281             :     case kSSEFloat32Add:
    1282           0 :       ASSEMBLE_SSE_BINOP(addss);
    1283             :       break;
    1284             :     case kSSEFloat32Sub:
    1285           0 :       ASSEMBLE_SSE_BINOP(subss);
    1286             :       break;
    1287             :     case kSSEFloat32Mul:
    1288           0 :       ASSEMBLE_SSE_BINOP(mulss);
    1289             :       break;
    1290             :     case kSSEFloat32Div:
    1291           0 :       ASSEMBLE_SSE_BINOP(divss);
    1292             :       // Don't delete this mov. It may improve performance on some CPUs,
    1293             :       // when there is a (v)mulss depending on the result.
    1294           0 :       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1295           0 :       break;
    1296             :     case kSSEFloat32Abs: {
    1297             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1298           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1299           0 :       __ psrlq(kScratchDoubleReg, 33);
    1300           0 :       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1301           0 :       break;
    1302             :     }
    1303             :     case kSSEFloat32Neg: {
    1304             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1305           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1306           0 :       __ psllq(kScratchDoubleReg, 31);
    1307           0 :       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1308           0 :       break;
    1309             :     }
    1310             :     case kSSEFloat32Sqrt:
    1311         364 :       ASSEMBLE_SSE_UNOP(sqrtss);
    1312             :       break;
    1313             :     case kSSEFloat32ToFloat64:
    1314       40904 :       ASSEMBLE_SSE_UNOP(Cvtss2sd);
    1315             :       break;
    1316             :     case kSSEFloat32Round: {
    1317             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1318             :       RoundingMode const mode =
    1319             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1320             :       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1321             :       break;
    1322             :     }
    1323             :     case kSSEFloat32ToInt32:
    1324         348 :       if (instr->InputAt(0)->IsFPRegister()) {
    1325         348 :         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1326             :       } else {
    1327           0 :         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
    1328             :       }
    1329             :       break;
    1330             :     case kSSEFloat32ToUint32: {
    1331          56 :       if (instr->InputAt(0)->IsFPRegister()) {
    1332          56 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1333             :       } else {
    1334           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1335             :       }
    1336             :       break;
    1337             :     }
    1338             :     case kSSEFloat64Cmp:
    1339        1840 :       ASSEMBLE_SSE_BINOP(Ucomisd);
    1340             :       break;
    1341             :     case kSSEFloat64Add:
    1342         632 :       ASSEMBLE_SSE_BINOP(addsd);
    1343             :       break;
    1344             :     case kSSEFloat64Sub:
    1345         468 :       ASSEMBLE_SSE_BINOP(subsd);
    1346             :       break;
    1347             :     case kSSEFloat64Mul:
    1348          56 :       ASSEMBLE_SSE_BINOP(mulsd);
    1349             :       break;
    1350             :     case kSSEFloat64Div:
    1351          52 :       ASSEMBLE_SSE_BINOP(divsd);
    1352             :       // Don't delete this mov. It may improve performance on some CPUs,
    1353             :       // when there is a (v)mulsd depending on the result.
    1354             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1355             :       break;
    1356             :     case kSSEFloat64Mod: {
    1357        1576 :       __ subq(rsp, Immediate(kDoubleSize));
    1358        1576 :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1359        1576 :                                                        kDoubleSize);
    1360             :       // Move values to st(0) and st(1).
    1361        3152 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
    1362        1576 :       __ fld_d(Operand(rsp, 0));
    1363        3152 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    1364        1576 :       __ fld_d(Operand(rsp, 0));
    1365             :       // Loop while fprem isn't done.
    1366        1576 :       Label mod_loop;
    1367        1576 :       __ bind(&mod_loop);
    1368             :       // This instructions traps on all kinds inputs, but we are assuming the
    1369             :       // floating point control word is set to ignore them all.
    1370        1576 :       __ fprem();
    1371             :       // The following 2 instruction implicitly use rax.
    1372        1576 :       __ fnstsw_ax();
    1373        1576 :       if (CpuFeatures::IsSupported(SAHF)) {
    1374             :         CpuFeatureScope sahf_scope(tasm(), SAHF);
    1375        1544 :         __ sahf();
    1376             :       } else {
    1377             :         __ shrl(rax, Immediate(8));
    1378             :         __ andl(rax, Immediate(0xFF));
    1379          32 :         __ pushq(rax);
    1380             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1381          32 :                                                          kSystemPointerSize);
    1382          32 :         __ popfq();
    1383             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1384          32 :                                                          -kSystemPointerSize);
    1385             :       }
    1386        1576 :       __ j(parity_even, &mod_loop);
    1387             :       // Move output to stack and clean up.
    1388        1576 :       __ fstp(1);
    1389        1576 :       __ fstp_d(Operand(rsp, 0));
    1390        3152 :       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
    1391             :       __ addq(rsp, Immediate(kDoubleSize));
    1392             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1393        1576 :                                                        -kDoubleSize);
    1394             :       break;
    1395             :     }
    1396             :     case kSSEFloat32Max: {
    1397          66 :       Label compare_swap, done_compare;
    1398          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1399             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1400             :       } else {
    1401           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1402             :       }
    1403             :       auto ool =
    1404             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1405          66 :       __ j(parity_even, ool->entry());
    1406          66 :       __ j(above, &done_compare, Label::kNear);
    1407          66 :       __ j(below, &compare_swap, Label::kNear);
    1408             :       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
    1409             :       __ testl(kScratchRegister, Immediate(1));
    1410          66 :       __ j(zero, &done_compare, Label::kNear);
    1411          66 :       __ bind(&compare_swap);
    1412          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1413             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1414             :       } else {
    1415           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1416             :       }
    1417          66 :       __ bind(&done_compare);
    1418          66 :       __ bind(ool->exit());
    1419             :       break;
    1420             :     }
    1421             :     case kSSEFloat32Min: {
    1422          66 :       Label compare_swap, done_compare;
    1423          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1424             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1425             :       } else {
    1426           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1427             :       }
    1428             :       auto ool =
    1429             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1430          66 :       __ j(parity_even, ool->entry());
    1431          66 :       __ j(below, &done_compare, Label::kNear);
    1432          66 :       __ j(above, &compare_swap, Label::kNear);
    1433          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1434             :         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
    1435             :       } else {
    1436           0 :         __ Movss(kScratchDoubleReg, i.InputOperand(1));
    1437             :         __ Movmskps(kScratchRegister, kScratchDoubleReg);
    1438             :       }
    1439             :       __ testl(kScratchRegister, Immediate(1));
    1440          66 :       __ j(zero, &done_compare, Label::kNear);
    1441          66 :       __ bind(&compare_swap);
    1442          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1443             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1444             :       } else {
    1445           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1446             :       }
    1447          66 :       __ bind(&done_compare);
    1448          66 :       __ bind(ool->exit());
    1449             :       break;
    1450             :     }
    1451             :     case kSSEFloat64Max: {
    1452         252 :       Label compare_swap, done_compare;
    1453         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1454             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1455             :       } else {
    1456           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1457             :       }
    1458             :       auto ool =
    1459             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1460         252 :       __ j(parity_even, ool->entry());
    1461         252 :       __ j(above, &done_compare, Label::kNear);
    1462         252 :       __ j(below, &compare_swap, Label::kNear);
    1463             :       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
    1464             :       __ testl(kScratchRegister, Immediate(1));
    1465         252 :       __ j(zero, &done_compare, Label::kNear);
    1466         252 :       __ bind(&compare_swap);
    1467         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1468             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1469             :       } else {
    1470           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1471             :       }
    1472         252 :       __ bind(&done_compare);
    1473         252 :       __ bind(ool->exit());
    1474             :       break;
    1475             :     }
    1476             :     case kSSEFloat64Min: {
    1477         339 :       Label compare_swap, done_compare;
    1478         339 :       if (instr->InputAt(1)->IsFPRegister()) {
    1479             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1480             :       } else {
    1481           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1482             :       }
    1483             :       auto ool =
    1484             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1485         339 :       __ j(parity_even, ool->entry());
    1486         339 :       __ j(below, &done_compare, Label::kNear);
    1487         339 :       __ j(above, &compare_swap, Label::kNear);
    1488         339 :       if (instr->InputAt(1)->IsFPRegister()) {
    1489             :         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
    1490             :       } else {
    1491           0 :         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
    1492             :         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
    1493             :       }
    1494             :       __ testl(kScratchRegister, Immediate(1));
    1495         339 :       __ j(zero, &done_compare, Label::kNear);
    1496         339 :       __ bind(&compare_swap);
    1497         339 :       if (instr->InputAt(1)->IsFPRegister()) {
    1498             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1499             :       } else {
    1500           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1501             :       }
    1502         339 :       __ bind(&done_compare);
    1503         339 :       __ bind(ool->exit());
    1504             :       break;
    1505             :     }
    1506             :     case kSSEFloat64Abs: {
    1507             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1508           6 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1509           6 :       __ psrlq(kScratchDoubleReg, 1);
    1510           6 :       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1511           6 :       break;
    1512             :     }
    1513             :     case kSSEFloat64Neg: {
    1514             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1515          76 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1516          76 :       __ psllq(kScratchDoubleReg, 63);
    1517          76 :       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1518          76 :       break;
    1519             :     }
    1520             :     case kSSEFloat64Sqrt:
    1521         415 :       ASSEMBLE_SSE_UNOP(Sqrtsd);
    1522             :       break;
    1523             :     case kSSEFloat64Round: {
    1524             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1525             :       RoundingMode const mode =
    1526             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1527             :       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1528             :       break;
    1529             :     }
    1530             :     case kSSEFloat64ToFloat32:
    1531       36016 :       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
    1532             :       break;
    1533             :     case kSSEFloat64ToInt32:
    1534      128088 :       if (instr->InputAt(0)->IsFPRegister()) {
    1535      107823 :         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1536             :       } else {
    1537       40530 :         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
    1538             :       }
    1539             :       break;
    1540             :     case kSSEFloat64ToUint32: {
    1541         702 :       if (instr->InputAt(0)->IsFPRegister()) {
    1542         702 :         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1543             :       } else {
    1544           0 :         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
    1545             :       }
    1546        1404 :       if (MiscField::decode(instr->opcode())) {
    1547         642 :         __ AssertZeroExtended(i.OutputRegister());
    1548             :       }
    1549             :       break;
    1550             :     }
    1551             :     case kSSEFloat32ToInt64:
    1552          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1553          52 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1554             :       } else {
    1555           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1556             :       }
    1557          52 :       if (instr->OutputCount() > 1) {
    1558          48 :         __ Set(i.OutputRegister(1), 1);
    1559          48 :         Label done;
    1560          48 :         Label fail;
    1561             :         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
    1562          48 :         if (instr->InputAt(0)->IsFPRegister()) {
    1563             :           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
    1564             :         } else {
    1565           0 :           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
    1566             :         }
    1567             :         // If the input is NaN, then the conversion fails.
    1568          48 :         __ j(parity_even, &fail);
    1569             :         // If the input is INT64_MIN, then the conversion succeeds.
    1570          48 :         __ j(equal, &done);
    1571             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1572             :         // If the conversion results in INT64_MIN, but the input was not
    1573             :         // INT64_MIN, then the conversion fails.
    1574          48 :         __ j(no_overflow, &done);
    1575          48 :         __ bind(&fail);
    1576          48 :         __ Set(i.OutputRegister(1), 0);
    1577          48 :         __ bind(&done);
    1578             :       }
    1579             :       break;
    1580             :     case kSSEFloat64ToInt64:
    1581        1142 :       if (instr->InputAt(0)->IsFPRegister()) {
    1582        1141 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
    1583             :       } else {
    1584           2 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
    1585             :       }
    1586        1149 :       if (instr->OutputCount() > 1) {
    1587        1018 :         __ Set(i.OutputRegister(1), 1);
    1588        1019 :         Label done;
    1589        1019 :         Label fail;
    1590             :         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
    1591        1015 :         if (instr->InputAt(0)->IsFPRegister()) {
    1592             :           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
    1593             :         } else {
    1594           0 :           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
    1595             :         }
    1596             :         // If the input is NaN, then the conversion fails.
    1597        1017 :         __ j(parity_even, &fail);
    1598             :         // If the input is INT64_MIN, then the conversion succeeds.
    1599        1019 :         __ j(equal, &done);
    1600             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1601             :         // If the conversion results in INT64_MIN, but the input was not
    1602             :         // INT64_MIN, then the conversion fails.
    1603        1019 :         __ j(no_overflow, &done);
    1604        1016 :         __ bind(&fail);
    1605        1018 :         __ Set(i.OutputRegister(1), 0);
    1606        1019 :         __ bind(&done);
    1607             :       }
    1608             :       break;
    1609             :     case kSSEFloat32ToUint64: {
    1610          52 :       Label fail;
    1611         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1612          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1613          52 :         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1614             :       } else {
    1615           0 :         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1616             :       }
    1617         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1618          52 :       __ bind(&fail);
    1619             :       break;
    1620             :     }
    1621             :     case kSSEFloat64ToUint64: {
    1622        2972 :       Label fail;
    1623        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1624        2972 :       if (instr->InputAt(0)->IsFPRegister()) {
    1625        2972 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1626             :       } else {
    1627           0 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1628             :       }
    1629        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1630        2972 :       __ bind(&fail);
    1631             :       break;
    1632             :     }
    1633             :     case kSSEInt32ToFloat64:
    1634      378584 :       if (instr->InputAt(0)->IsRegister()) {
    1635      374742 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1636             :       } else {
    1637        7684 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1638             :       }
    1639             :       break;
    1640             :     case kSSEInt32ToFloat32:
    1641         984 :       if (instr->InputAt(0)->IsRegister()) {
    1642         976 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1643             :       } else {
    1644          16 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1645             :       }
    1646             :       break;
    1647             :     case kSSEInt64ToFloat32:
    1648          48 :       if (instr->InputAt(0)->IsRegister()) {
    1649          48 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1650             :       } else {
    1651           0 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1652             :       }
    1653             :       break;
    1654             :     case kSSEInt64ToFloat64:
    1655        3489 :       if (instr->InputAt(0)->IsRegister()) {
    1656        1489 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1657             :       } else {
    1658        4000 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1659             :       }
    1660             :       break;
    1661             :     case kSSEUint64ToFloat32:
    1662          31 :       if (instr->InputAt(0)->IsRegister()) {
    1663          31 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1664             :       } else {
    1665           0 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1666             :       }
    1667             :       break;
    1668             :     case kSSEUint64ToFloat64:
    1669        3608 :       if (instr->InputAt(0)->IsRegister()) {
    1670        2376 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1671             :       } else {
    1672        2464 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1673             :       }
    1674             :       break;
    1675             :     case kSSEUint32ToFloat64:
    1676       11332 :       if (instr->InputAt(0)->IsRegister()) {
    1677         391 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1678             :       } else {
    1679       21882 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1680             :       }
    1681             :       break;
    1682             :     case kSSEUint32ToFloat32:
    1683          88 :       if (instr->InputAt(0)->IsRegister()) {
    1684          88 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1685             :       } else {
    1686           0 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1687             :       }
    1688             :       break;
    1689             :     case kSSEFloat64ExtractLowWord32:
    1690         116 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1691           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    1692             :       } else {
    1693             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    1694             :       }
    1695             :       break;
    1696             :     case kSSEFloat64ExtractHighWord32:
    1697      101385 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1698      120244 :         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
    1699             :       } else {
    1700       41263 :         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
    1701             :       }
    1702             :       break;
    1703             :     case kSSEFloat64InsertLowWord32:
    1704           4 :       if (instr->InputAt(1)->IsRegister()) {
    1705           4 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
    1706             :       } else {
    1707           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
    1708             :       }
    1709             :       break;
    1710             :     case kSSEFloat64InsertHighWord32:
    1711         116 :       if (instr->InputAt(1)->IsRegister()) {
    1712         116 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
    1713             :       } else {
    1714           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
    1715             :       }
    1716             :       break;
    1717             :     case kSSEFloat64LoadLowWord32:
    1718         112 :       if (instr->InputAt(0)->IsRegister()) {
    1719             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    1720             :       } else {
    1721           0 :         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
    1722             :       }
    1723             :       break;
    1724             :     case kAVXFloat32Cmp: {
    1725             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1726        1547 :       if (instr->InputAt(1)->IsFPRegister()) {
    1727        1523 :         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1728             :       } else {
    1729          48 :         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1730             :       }
    1731             :       break;
    1732             :     }
    1733             :     case kAVXFloat32Add:
    1734        3618 :       ASSEMBLE_AVX_BINOP(vaddss);
    1735             :       break;
    1736             :     case kAVXFloat32Sub:
    1737        5242 :       ASSEMBLE_AVX_BINOP(vsubss);
    1738             :       break;
    1739             :     case kAVXFloat32Mul:
    1740        1780 :       ASSEMBLE_AVX_BINOP(vmulss);
    1741             :       break;
    1742             :     case kAVXFloat32Div:
    1743         770 :       ASSEMBLE_AVX_BINOP(vdivss);
    1744             :       // Don't delete this mov. It may improve performance on some CPUs,
    1745             :       // when there is a (v)mulss depending on the result.
    1746             :       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1747             :       break;
    1748             :     case kAVXFloat64Cmp: {
    1749             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1750      261574 :       if (instr->InputAt(1)->IsFPRegister()) {
    1751      240948 :         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1752             :       } else {
    1753       41252 :         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1754             :       }
    1755             :       break;
    1756             :     }
    1757             :     case kAVXFloat64Add:
    1758      158512 :       ASSEMBLE_AVX_BINOP(vaddsd);
    1759             :       break;
    1760             :     case kAVXFloat64Sub:
    1761       31168 :       ASSEMBLE_AVX_BINOP(vsubsd);
    1762             :       break;
    1763             :     case kAVXFloat64Mul:
    1764       23750 :       ASSEMBLE_AVX_BINOP(vmulsd);
    1765             :       break;
    1766             :     case kAVXFloat64Div:
    1767       23990 :       ASSEMBLE_AVX_BINOP(vdivsd);
    1768             :       // Don't delete this mov. It may improve performance on some CPUs,
    1769             :       // when there is a (v)mulsd depending on the result.
    1770             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1771             :       break;
    1772             :     case kAVXFloat32Abs: {
    1773             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1774             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1775          66 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1776             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
    1777          66 :       if (instr->InputAt(0)->IsFPRegister()) {
    1778             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1779             :                   i.InputDoubleRegister(0));
    1780             :       } else {
    1781           0 :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1782             :                   i.InputOperand(0));
    1783             :       }
    1784             :       break;
    1785             :     }
    1786             :     case kAVXFloat32Neg: {
    1787             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1788             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1789         176 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1790             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
    1791         176 :       if (instr->InputAt(0)->IsFPRegister()) {
    1792             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1793             :                   i.InputDoubleRegister(0));
    1794             :       } else {
    1795           0 :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1796             :                   i.InputOperand(0));
    1797             :       }
    1798             :       break;
    1799             :     }
    1800             :     case kAVXFloat64Abs: {
    1801             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1802             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1803         621 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1804             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
    1805         621 :       if (instr->InputAt(0)->IsFPRegister()) {
    1806             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1807             :                   i.InputDoubleRegister(0));
    1808             :       } else {
    1809           0 :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1810             :                   i.InputOperand(0));
    1811             :       }
    1812             :       break;
    1813             :     }
    1814             :     case kAVXFloat64Neg: {
    1815             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1816             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1817        9650 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1818             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
    1819        9649 :       if (instr->InputAt(0)->IsFPRegister()) {
    1820             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1821             :                   i.InputDoubleRegister(0));
    1822             :       } else {
    1823          83 :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1824             :                   i.InputOperand(0));
    1825             :       }
    1826             :       break;
    1827             :     }
    1828             :     case kSSEFloat64SilenceNaN:
    1829             :       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
    1830             :       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
    1831             :       break;
    1832             :     case kX64Movsxbl:
    1833       44834 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1834       90287 :       ASSEMBLE_MOVX(movsxbl);
    1835       44834 :       __ AssertZeroExtended(i.OutputRegister());
    1836       44834 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1837       44834 :       break;
    1838             :     case kX64Movzxbl:
    1839      176740 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1840      354629 :       ASSEMBLE_MOVX(movzxbl);
    1841      176741 :       __ AssertZeroExtended(i.OutputRegister());
    1842      176741 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1843      176741 :       break;
    1844             :     case kX64Movsxbq:
    1845       13515 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1846       27042 :       ASSEMBLE_MOVX(movsxbq);
    1847       13515 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1848       13515 :       break;
    1849             :     case kX64Movzxbq:
    1850       14020 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1851       28040 :       ASSEMBLE_MOVX(movzxbq);
    1852       14020 :       __ AssertZeroExtended(i.OutputRegister());
    1853       14020 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1854       14020 :       break;
    1855             :     case kX64Movb: {
    1856       79996 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1857       79996 :       size_t index = 0;
    1858       79996 :       Operand operand = i.MemoryOperand(&index);
    1859      159996 :       if (HasImmediateInput(instr, index)) {
    1860       13484 :         __ movb(operand, Immediate(i.InputInt8(index)));
    1861             :       } else {
    1862       73256 :         __ movb(operand, i.InputRegister(index));
    1863             :       }
    1864       79998 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1865             :       break;
    1866             :     }
    1867             :     case kX64Movsxwl:
    1868       10503 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1869       21486 :       ASSEMBLE_MOVX(movsxwl);
    1870       10503 :       __ AssertZeroExtended(i.OutputRegister());
    1871       10503 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1872       10503 :       break;
    1873             :     case kX64Movzxwl:
    1874      158231 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1875      322943 :       ASSEMBLE_MOVX(movzxwl);
    1876      158234 :       __ AssertZeroExtended(i.OutputRegister());
    1877      158234 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1878      158235 :       break;
    1879             :     case kX64Movsxwq:
    1880        9027 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1881       18058 :       ASSEMBLE_MOVX(movsxwq);
    1882             :       break;
    1883             :     case kX64Movzxwq:
    1884         672 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1885        1344 :       ASSEMBLE_MOVX(movzxwq);
    1886         672 :       __ AssertZeroExtended(i.OutputRegister());
    1887         672 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1888         672 :       break;
    1889             :     case kX64Movw: {
    1890       15370 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1891       15370 :       size_t index = 0;
    1892       15370 :       Operand operand = i.MemoryOperand(&index);
    1893       30740 :       if (HasImmediateInput(instr, index)) {
    1894        1830 :         __ movw(operand, Immediate(i.InputInt16(index)));
    1895             :       } else {
    1896       14455 :         __ movw(operand, i.InputRegister(index));
    1897             :       }
    1898       15370 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1899             :       break;
    1900             :     }
    1901             :     case kX64Movl:
    1902     2519239 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1903     2518456 :       if (instr->HasOutput()) {
    1904      969402 :         if (instr->addressing_mode() == kMode_None) {
    1905       60747 :           if (instr->InputAt(0)->IsRegister()) {
    1906       57179 :             __ movl(i.OutputRegister(), i.InputRegister(0));
    1907             :           } else {
    1908        7134 :             __ movl(i.OutputRegister(), i.InputOperand(0));
    1909             :           }
    1910             :         } else {
    1911      908666 :           __ movl(i.OutputRegister(), i.MemoryOperand());
    1912             :         }
    1913      969454 :         __ AssertZeroExtended(i.OutputRegister());
    1914             :       } else {
    1915     1549054 :         size_t index = 0;
    1916     1549054 :         Operand operand = i.MemoryOperand(&index);
    1917     3098732 :         if (HasImmediateInput(instr, index)) {
    1918      482672 :           __ movl(operand, i.InputImmediate(index));
    1919             :         } else {
    1920     1066695 :           __ movl(operand, i.InputRegister(index));
    1921             :         }
    1922             :       }
    1923     2518533 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1924     2518571 :       break;
    1925             :     case kX64Movsxlq:
    1926      230911 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1927      546998 :       ASSEMBLE_MOVX(movsxlq);
    1928      230910 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1929      230911 :       break;
    1930             :     case kX64MovqDecompressTaggedSigned: {
    1931           0 :       CHECK(instr->HasOutput());
    1932           0 :       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
    1933           0 :       break;
    1934             :     }
    1935             :     case kX64MovqDecompressTaggedPointer: {
    1936           0 :       CHECK(instr->HasOutput());
    1937           0 :       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
    1938           0 :       break;
    1939             :     }
    1940             :     case kX64MovqDecompressAnyTagged: {
    1941           0 :       CHECK(instr->HasOutput());
    1942           0 :       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
    1943           0 :       break;
    1944             :     }
    1945             :     case kX64MovqCompressTagged: {
    1946           0 :       CHECK(!instr->HasOutput());
    1947           0 :       size_t index = 0;
    1948           0 :       Operand operand = i.MemoryOperand(&index);
    1949           0 :       if (HasImmediateInput(instr, index)) {
    1950           0 :         __ StoreTaggedField(operand, i.InputImmediate(index));
    1951             :       } else {
    1952           0 :         __ StoreTaggedField(operand, i.InputRegister(index));
    1953             :       }
    1954             :       break;
    1955             :     }
    1956             :     case kX64DecompressSigned: {
    1957           0 :       CHECK(instr->HasOutput());
    1958           0 :       ASSEMBLE_MOVX(movsxlq);
    1959             :       break;
    1960             :     }
    1961             :     case kX64DecompressPointer: {
    1962           0 :       CHECK(instr->HasOutput());
    1963           0 :       ASSEMBLE_MOVX(movsxlq);
    1964           0 :       __ addq(i.OutputRegister(), kRootRegister);
    1965             :       break;
    1966             :     }
    1967             :     case kX64DecompressAny: {
    1968           0 :       CHECK(instr->HasOutput());
    1969           0 :       ASSEMBLE_MOVX(movsxlq);
    1970             :       // TODO(solanes): Do branchful compute?
    1971             :       // Branchlessly compute |masked_root|:
    1972             :       STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
    1973             :       Register masked_root = kScratchRegister;
    1974           0 :       __ movl(masked_root, i.OutputRegister());
    1975             :       __ andl(masked_root, Immediate(kSmiTagMask));
    1976             :       __ negq(masked_root);
    1977             :       __ andq(masked_root, kRootRegister);
    1978             :       // Now this add operation will either leave the value unchanged if it is a
    1979             :       // smi or add the isolate root if it is a heap object.
    1980             :       __ addq(i.OutputRegister(), masked_root);
    1981             :       break;
    1982             :     }
    1983             :     // TODO(solanes): Combine into one Compress? They seem to be identical.
    1984             :     // TODO(solanes): We might get away with doing a no-op in these three cases.
    1985             :     // The movl instruction is the conservative way for the moment.
    1986             :     case kX64CompressSigned: {
    1987           0 :       ASSEMBLE_MOVX(movl);
    1988             :       break;
    1989             :     }
    1990             :     case kX64CompressPointer: {
    1991           0 :       ASSEMBLE_MOVX(movl);
    1992             :       break;
    1993             :     }
    1994             :     case kX64CompressAny: {
    1995           0 :       ASSEMBLE_MOVX(movl);
    1996             :       break;
    1997             :     }
    1998             :     case kX64Movq:
    1999     8641816 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2000     8641547 :       if (instr->HasOutput()) {
    2001     5795988 :         __ movq(i.OutputRegister(), i.MemoryOperand());
    2002             :       } else {
    2003     2846960 :         size_t index = 0;
    2004     2846960 :         Operand operand = i.MemoryOperand(&index);
    2005     5693920 :         if (HasImmediateInput(instr, index)) {
    2006       21878 :           __ movq(operand, i.InputImmediate(index));
    2007             :         } else {
    2008     2825082 :           __ movq(operand, i.InputRegister(index));
    2009             :         }
    2010             :       }
    2011     8643840 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    2012     8643423 :       break;
    2013             :     case kX64Movss:
    2014       28867 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2015       28869 :       if (instr->HasOutput()) {
    2016       15709 :         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
    2017             :       } else {
    2018       13158 :         size_t index = 0;
    2019       13158 :         Operand operand = i.MemoryOperand(&index);
    2020       26316 :         __ movss(operand, i.InputDoubleRegister(index));
    2021             :       }
    2022             :       break;
    2023             :     case kX64Movsd: {
    2024      620200 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2025      620234 :       if (instr->HasOutput()) {
    2026             :         const MemoryAccessMode access_mode =
    2027             :             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
    2028      426145 :         if (access_mode == kMemoryAccessPoisoned) {
    2029             :           // If we have to poison the loaded value, we load into a general
    2030             :           // purpose register first, mask it with the poison, and move the
    2031             :           // value from the general purpose register into the double register.
    2032           0 :           __ movq(kScratchRegister, i.MemoryOperand());
    2033             :           __ andq(kScratchRegister, kSpeculationPoisonRegister);
    2034             :           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
    2035             :         } else {
    2036             :           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
    2037             :         }
    2038             :       } else {
    2039      194089 :         size_t index = 0;
    2040      194089 :         Operand operand = i.MemoryOperand(&index);
    2041      194090 :         __ Movsd(operand, i.InputDoubleRegister(index));
    2042             :       }
    2043             :       break;
    2044             :     }
    2045             :     case kX64Movdqu: {
    2046             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2047        9860 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2048        9860 :       if (instr->HasOutput()) {
    2049        5780 :         __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
    2050             :       } else {
    2051        4080 :         size_t index = 0;
    2052        4080 :         Operand operand = i.MemoryOperand(&index);
    2053        8160 :         __ movdqu(operand, i.InputSimd128Register(index));
    2054             :       }
    2055             :       break;
    2056             :     }
    2057             :     case kX64BitcastFI:
    2058       51102 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2059           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    2060             :       } else {
    2061             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    2062             :       }
    2063             :       break;
    2064             :     case kX64BitcastDL:
    2065       50775 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2066           0 :         __ movq(i.OutputRegister(), i.InputOperand(0));
    2067             :       } else {
    2068             :         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
    2069             :       }
    2070             :       break;
    2071             :     case kX64BitcastIF:
    2072         327 :       if (instr->InputAt(0)->IsRegister()) {
    2073             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    2074             :       } else {
    2075           0 :         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
    2076             :       }
    2077             :       break;
    2078             :     case kX64BitcastLD:
    2079         215 :       if (instr->InputAt(0)->IsRegister()) {
    2080             :         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
    2081             :       } else {
    2082           0 :         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
    2083             :       }
    2084             :       break;
    2085             :     case kX64Lea32: {
    2086             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2087             :       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
    2088             :       // and addressing mode just happens to work out. The "addl"/"subl" forms
    2089             :       // in these cases are faster based on measurements.
    2090      329177 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2091      151935 :         if (mode == kMode_MRI) {
    2092             :           int32_t constant_summand = i.InputInt32(1);
    2093             :           DCHECK_NE(0, constant_summand);
    2094       75452 :           if (constant_summand > 0) {
    2095       54240 :             __ addl(i.OutputRegister(), Immediate(constant_summand));
    2096             :           } else {
    2097       21212 :             __ subl(i.OutputRegister(),
    2098             :                     Immediate(base::NegateWithWraparound(constant_summand)));
    2099             :           }
    2100       76464 :         } else if (mode == kMode_MR1) {
    2101       16459 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2102        4288 :             __ shll(i.OutputRegister(), Immediate(1));
    2103             :           } else {
    2104       12171 :             __ addl(i.OutputRegister(), i.InputRegister(1));
    2105             :           }
    2106       60005 :         } else if (mode == kMode_M2) {
    2107           0 :           __ shll(i.OutputRegister(), Immediate(1));
    2108       60005 :         } else if (mode == kMode_M4) {
    2109        2942 :           __ shll(i.OutputRegister(), Immediate(2));
    2110       57063 :         } else if (mode == kMode_M8) {
    2111         120 :           __ shll(i.OutputRegister(), Immediate(3));
    2112             :         } else {
    2113       56943 :           __ leal(i.OutputRegister(), i.MemoryOperand());
    2114             :         }
    2115      203608 :       } else if (mode == kMode_MR1 &&
    2116             :                  i.InputRegister(1) == i.OutputRegister()) {
    2117       17090 :         __ addl(i.OutputRegister(), i.InputRegister(0));
    2118             :       } else {
    2119      160150 :         __ leal(i.OutputRegister(), i.MemoryOperand());
    2120             :       }
    2121      329198 :       __ AssertZeroExtended(i.OutputRegister());
    2122      329170 :       break;
    2123             :     }
    2124             :     case kX64Lea: {
    2125             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2126             :       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
    2127             :       // and addressing mode just happens to work out. The "addq"/"subq" forms
    2128             :       // in these cases are faster based on measurements.
    2129     1958704 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2130      540154 :         if (mode == kMode_MRI) {
    2131             :           int32_t constant_summand = i.InputInt32(1);
    2132      437427 :           if (constant_summand > 0) {
    2133      354263 :             __ addq(i.OutputRegister(), Immediate(constant_summand));
    2134       83164 :           } else if (constant_summand < 0) {
    2135      166270 :             __ subq(i.OutputRegister(), Immediate(-constant_summand));
    2136             :           }
    2137      102726 :         } else if (mode == kMode_MR1) {
    2138       50559 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2139        2714 :             __ shlq(i.OutputRegister(), Immediate(1));
    2140             :           } else {
    2141       47845 :             __ addq(i.OutputRegister(), i.InputRegister(1));
    2142             :           }
    2143       52167 :         } else if (mode == kMode_M2) {
    2144           0 :           __ shlq(i.OutputRegister(), Immediate(1));
    2145       52167 :         } else if (mode == kMode_M4) {
    2146         336 :           __ shlq(i.OutputRegister(), Immediate(2));
    2147       51831 :         } else if (mode == kMode_M8) {
    2148       12792 :           __ shlq(i.OutputRegister(), Immediate(3));
    2149             :         } else {
    2150       39039 :           __ leaq(i.OutputRegister(), i.MemoryOperand());
    2151             :         }
    2152     1660047 :       } else if (mode == kMode_MR1 &&
    2153             :                  i.InputRegister(1) == i.OutputRegister()) {
    2154      169038 :         __ addq(i.OutputRegister(), i.InputRegister(0));
    2155             :       } else {
    2156     1249549 :         __ leaq(i.OutputRegister(), i.MemoryOperand());
    2157             :       }
    2158             :       break;
    2159             :     }
    2160             :     case kX64Dec32:
    2161           0 :       __ decl(i.OutputRegister());
    2162             :       break;
    2163             :     case kX64Inc32:
    2164           0 :       __ incl(i.OutputRegister());
    2165             :       break;
    2166             :     case kX64Push:
    2167     3543484 :       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
    2168       18699 :         size_t index = 0;
    2169       18699 :         Operand operand = i.MemoryOperand(&index);
    2170       18699 :         __ pushq(operand);
    2171             :         frame_access_state()->IncreaseSPDelta(1);
    2172       18699 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2173       18699 :                                                          kSystemPointerSize);
    2174     3524785 :       } else if (HasImmediateInput(instr, 0)) {
    2175      523831 :         __ pushq(i.InputImmediate(0));
    2176             :         frame_access_state()->IncreaseSPDelta(1);
    2177      523831 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2178      523831 :                                                          kSystemPointerSize);
    2179     3000954 :       } else if (instr->InputAt(0)->IsRegister()) {
    2180     2250745 :         __ pushq(i.InputRegister(0));
    2181             :         frame_access_state()->IncreaseSPDelta(1);
    2182     2250751 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2183     2250751 :                                                          kSystemPointerSize);
    2184     1488556 :       } else if (instr->InputAt(0)->IsFloatRegister() ||
    2185             :                  instr->InputAt(0)->IsDoubleRegister()) {
    2186             :         // TODO(titzer): use another machine instruction?
    2187       22750 :         __ subq(rsp, Immediate(kDoubleSize));
    2188             :         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
    2189       22750 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2190       22750 :                                                          kDoubleSize);
    2191       45500 :         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    2192      727459 :       } else if (instr->InputAt(0)->IsSimd128Register()) {
    2193             :         // TODO(titzer): use another machine instruction?
    2194         168 :         __ subq(rsp, Immediate(kSimd128Size));
    2195             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2196             :                                               kSystemPointerSize);
    2197         168 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2198         168 :                                                          kSimd128Size);
    2199         336 :         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
    2200      747148 :       } else if (instr->InputAt(0)->IsStackSlot() ||
    2201      740644 :                  instr->InputAt(0)->IsFloatStackSlot() ||
    2202             :                  instr->InputAt(0)->IsDoubleStackSlot()) {
    2203      727075 :         __ pushq(i.InputOperand(0));
    2204             :         frame_access_state()->IncreaseSPDelta(1);
    2205      727075 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2206      727075 :                                                          kSystemPointerSize);
    2207             :       } else {
    2208             :         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
    2209         216 :         __ Movups(kScratchDoubleReg, i.InputOperand(0));
    2210             :         // TODO(titzer): use another machine instruction?
    2211             :         __ subq(rsp, Immediate(kSimd128Size));
    2212             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2213             :                                               kSystemPointerSize);
    2214         216 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2215         216 :                                                          kSimd128Size);
    2216         432 :         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
    2217             :       }
    2218             :       break;
    2219             :     case kX64Poke: {
    2220             :       int slot = MiscField::decode(instr->opcode());
    2221        3392 :       if (HasImmediateInput(instr, 0)) {
    2222        2264 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
    2223             :       } else {
    2224        4520 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
    2225             :       }
    2226             :       break;
    2227             :     }
    2228             :     case kX64Peek: {
    2229             :       int reverse_slot = i.InputInt32(0);
    2230             :       int offset =
    2231        5067 :           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
    2232        5067 :       if (instr->OutputAt(0)->IsFPRegister()) {
    2233             :         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
    2234        2528 :         if (op->representation() == MachineRepresentation::kFloat64) {
    2235        2528 :           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
    2236             :         } else {
    2237             :           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
    2238        2528 :           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
    2239             :         }
    2240             :       } else {
    2241        7617 :         __ movq(i.OutputRegister(), Operand(rbp, offset));
    2242             :       }
    2243             :       break;
    2244             :     }
    2245             :     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
    2246             :     case kX64F32x4Splat: {
    2247         140 :       XMMRegister dst = i.OutputSimd128Register();
    2248         140 :       if (instr->InputAt(0)->IsFPRegister()) {
    2249         140 :         __ movss(dst, i.InputDoubleRegister(0));
    2250             :       } else {
    2251           0 :         __ movss(dst, i.InputOperand(0));
    2252             :       }
    2253         140 :       __ shufps(dst, dst, 0x0);
    2254             :       break;
    2255             :     }
    2256             :     case kX64F32x4ExtractLane: {
    2257             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2258          64 :       __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
    2259          32 :       __ movd(i.OutputDoubleRegister(), kScratchRegister);
    2260             :       break;
    2261             :     }
    2262             :     case kX64F32x4ReplaceLane: {
    2263             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2264             :       // The insertps instruction uses imm8[5:4] to indicate the lane
    2265             :       // that needs to be replaced.
    2266          32 :       byte select = i.InputInt8(1) << 4 & 0x30;
    2267          32 :       if (instr->InputAt(2)->IsFPRegister()) {
    2268          64 :         __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
    2269          32 :                     select);
    2270             :       } else {
    2271           0 :         __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
    2272             :       }
    2273             :       break;
    2274             :     }
    2275             :     case kX64F32x4SConvertI32x4: {
    2276           4 :       __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2277           4 :       break;
    2278             :     }
    2279             :     case kX64F32x4UConvertI32x4: {
    2280             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2281             :       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
    2282             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2283             :       XMMRegister dst = i.OutputSimd128Register();
    2284           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
    2285           4 :       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
    2286             :       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
    2287           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
    2288           4 :       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
    2289           4 :       __ cvtdq2ps(dst, dst);             // convert hi exactly
    2290           4 :       __ addps(dst, dst);                // double hi, exactly
    2291           4 :       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
    2292             :       break;
    2293             :     }
    2294             :     case kX64F32x4Abs: {
    2295             :       XMMRegister dst = i.OutputSimd128Register();
    2296             :       XMMRegister src = i.InputSimd128Register(0);
    2297           4 :       if (dst == src) {
    2298           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2299           4 :         __ psrld(kScratchDoubleReg, 1);
    2300           4 :         __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
    2301             :       } else {
    2302           0 :         __ pcmpeqd(dst, dst);
    2303           0 :         __ psrld(dst, 1);
    2304           0 :         __ andps(dst, i.InputSimd128Register(0));
    2305             :       }
    2306             :       break;
    2307             :     }
    2308             :     case kX64F32x4Neg: {
    2309             :       XMMRegister dst = i.OutputSimd128Register();
    2310             :       XMMRegister src = i.InputSimd128Register(0);
    2311           4 :       if (dst == src) {
    2312           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2313           4 :         __ pslld(kScratchDoubleReg, 31);
    2314           4 :         __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
    2315             :       } else {
    2316           0 :         __ pcmpeqd(dst, dst);
    2317           0 :         __ pslld(dst, 31);
    2318           0 :         __ xorps(dst, i.InputSimd128Register(0));
    2319             :       }
    2320             :       break;
    2321             :     }
    2322             :     case kX64F32x4RecipApprox: {
    2323           4 :       __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2324           4 :       break;
    2325             :     }
    2326             :     case kX64F32x4RecipSqrtApprox: {
    2327           4 :       __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2328           4 :       break;
    2329             :     }
    2330             :     case kX64F32x4Add: {
    2331             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2332          12 :       __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2333          12 :       break;
    2334             :     }
    2335             :     case kX64F32x4AddHoriz: {
    2336             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2337             :       CpuFeatureScope sse_scope(tasm(), SSE3);
    2338           4 :       __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2339             :       break;
    2340             :     }
    2341             :     case kX64F32x4Sub: {
    2342             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2343           4 :       __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2344           4 :       break;
    2345             :     }
    2346             :     case kX64F32x4Mul: {
    2347             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2348           4 :       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2349           4 :       break;
    2350             :     }
    2351             :     case kX64F32x4Min: {
    2352           4 :       XMMRegister src1 = i.InputSimd128Register(1),
    2353           4 :                   dst = i.OutputSimd128Register();
    2354             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2355             :       // The minps instruction doesn't propagate NaNs and +0's in its first
    2356             :       // operand. Perform minps in both orders, merge the resuls, and adjust.
    2357           4 :       __ movaps(kScratchDoubleReg, src1);
    2358           4 :       __ minps(kScratchDoubleReg, dst);
    2359           4 :       __ minps(dst, src1);
    2360             :       // propagate -0's and NaNs, which may be non-canonical.
    2361           4 :       __ orps(kScratchDoubleReg, dst);
    2362             :       // Canonicalize NaNs by quieting and clearing the payload.
    2363           4 :       __ cmpps(dst, kScratchDoubleReg, 3);
    2364           4 :       __ orps(kScratchDoubleReg, dst);
    2365           4 :       __ psrld(dst, 10);
    2366           4 :       __ andnps(dst, kScratchDoubleReg);
    2367             :       break;
    2368             :     }
    2369             :     case kX64F32x4Max: {
    2370           4 :       XMMRegister src1 = i.InputSimd128Register(1),
    2371           4 :                   dst = i.OutputSimd128Register();
    2372             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2373             :       // The maxps instruction doesn't propagate NaNs and +0's in its first
    2374             :       // operand. Perform maxps in both orders, merge the resuls, and adjust.
    2375           4 :       __ movaps(kScratchDoubleReg, src1);
    2376           4 :       __ maxps(kScratchDoubleReg, dst);
    2377           4 :       __ maxps(dst, src1);
    2378             :       // Find discrepancies.
    2379           4 :       __ xorps(dst, kScratchDoubleReg);
    2380             :       // Propagate NaNs, which may be non-canonical.
    2381           4 :       __ orps(kScratchDoubleReg, dst);
    2382             :       // Propagate sign discrepancy and (subtle) quiet NaNs.
    2383           4 :       __ subps(kScratchDoubleReg, dst);
    2384             :       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
    2385           4 :       __ cmpps(dst, kScratchDoubleReg, 3);
    2386           4 :       __ psrld(dst, 10);
    2387           4 :       __ andnps(dst, kScratchDoubleReg);
    2388             :       break;
    2389             :     }
    2390             :     case kX64F32x4Eq: {
    2391             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2392           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
    2393           4 :       break;
    2394             :     }
    2395             :     case kX64F32x4Ne: {
    2396             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2397           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
    2398           4 :       break;
    2399             :     }
    2400             :     case kX64F32x4Lt: {
    2401             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2402           8 :       __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2403             :       break;
    2404             :     }
    2405             :     case kX64F32x4Le: {
    2406             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2407           8 :       __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2408             :       break;
    2409             :     }
    2410             :     case kX64I32x4Splat: {
    2411        1088 :       XMMRegister dst = i.OutputSimd128Register();
    2412        1088 :       if (instr->InputAt(0)->IsRegister()) {
    2413        1088 :         __ movd(dst, i.InputRegister(0));
    2414             :       } else {
    2415           0 :         __ movd(dst, i.InputOperand(0));
    2416             :       }
    2417        1088 :       __ pshufd(dst, dst, 0x0);
    2418             :       break;
    2419             :     }
    2420             :     case kX64I32x4ExtractLane: {
    2421             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2422        3816 :       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
    2423             :       break;
    2424             :     }
    2425             :     case kX64I32x4ReplaceLane: {
    2426             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2427        1784 :       if (instr->InputAt(2)->IsRegister()) {
    2428         296 :         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
    2429         296 :                   i.InputInt8(1));
    2430             :       } else {
    2431        2976 :         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2432             :       }
    2433             :       break;
    2434             :     }
    2435             :     case kX64I32x4SConvertF32x4: {
    2436             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2437             :       XMMRegister dst = i.OutputSimd128Register();
    2438             :       // NAN->0
    2439           4 :       __ movaps(kScratchDoubleReg, dst);
    2440             :       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
    2441             :       __ pand(dst, kScratchDoubleReg);
    2442             :       // Set top bit if >= 0 (but not -0.0!)
    2443             :       __ pxor(kScratchDoubleReg, dst);
    2444             :       // Convert
    2445           4 :       __ cvttps2dq(dst, dst);
    2446             :       // Set top bit if >=0 is now < 0
    2447             :       __ pand(kScratchDoubleReg, dst);
    2448           4 :       __ psrad(kScratchDoubleReg, 31);
    2449             :       // Set positive overflow lanes to 0x7FFFFFFF
    2450             :       __ pxor(dst, kScratchDoubleReg);
    2451             :       break;
    2452             :     }
    2453             :     case kX64I32x4SConvertI16x8Low: {
    2454             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2455           4 :       __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2456             :       break;
    2457             :     }
    2458             :     case kX64I32x4SConvertI16x8High: {
    2459             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2460             :       XMMRegister dst = i.OutputSimd128Register();
    2461           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2462             :       __ pmovsxwd(dst, dst);
    2463             :       break;
    2464             :     }
    2465             :     case kX64I32x4Neg: {
    2466             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2467             :       XMMRegister dst = i.OutputSimd128Register();
    2468             :       XMMRegister src = i.InputSimd128Register(0);
    2469           4 :       if (dst == src) {
    2470           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2471             :         __ psignd(dst, kScratchDoubleReg);
    2472             :       } else {
    2473           0 :         __ pxor(dst, dst);
    2474             :         __ psubd(dst, src);
    2475             :       }
    2476             :       break;
    2477             :     }
    2478             :     case kX64I32x4Shl: {
    2479         248 :       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
    2480         124 :       break;
    2481             :     }
    2482             :     case kX64I32x4ShrS: {
    2483         248 :       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
    2484         124 :       break;
    2485             :     }
    2486             :     case kX64I32x4Add: {
    2487          12 :       __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2488             :       break;
    2489             :     }
    2490             :     case kX64I32x4AddHoriz: {
    2491             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2492           4 :       __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2493             :       break;
    2494             :     }
    2495             :     case kX64I32x4Sub: {
    2496           4 :       __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2497             :       break;
    2498             :     }
    2499             :     case kX64I32x4Mul: {
    2500             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2501           4 :       __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2502             :       break;
    2503             :     }
    2504             :     case kX64I32x4MinS: {
    2505             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2506           4 :       __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2507             :       break;
    2508             :     }
    2509             :     case kX64I32x4MaxS: {
    2510             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2511           4 :       __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2512             :       break;
    2513             :     }
    2514             :     case kX64I32x4Eq: {
    2515          12 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2516             :       break;
    2517             :     }
    2518             :     case kX64I32x4Ne: {
    2519          16 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2520             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2521             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2522             :       break;
    2523             :     }
    2524             :     case kX64I32x4GtS: {
    2525           8 :       __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2526             :       break;
    2527             :     }
    2528             :     case kX64I32x4GeS: {
    2529             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2530             :       XMMRegister dst = i.OutputSimd128Register();
    2531             :       XMMRegister src = i.InputSimd128Register(1);
    2532           8 :       __ pminsd(dst, src);
    2533             :       __ pcmpeqd(dst, src);
    2534             :       break;
    2535             :     }
    2536             :     case kX64I32x4UConvertF32x4: {
    2537             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2538             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2539             :       XMMRegister dst = i.OutputSimd128Register();
    2540             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2541             :       // NAN->0, negative->0
    2542           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2543           4 :       __ maxps(dst, kScratchDoubleReg);
    2544             :       // scratch: float representation of max_signed
    2545             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2546           4 :       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
    2547           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
    2548             :       // tmp: convert (src-max_signed).
    2549             :       // Positive overflow lanes -> 0x7FFFFFFF
    2550             :       // Negative lanes -> 0
    2551           4 :       __ movaps(tmp, dst);
    2552           4 :       __ subps(tmp, kScratchDoubleReg);
    2553             :       __ cmpleps(kScratchDoubleReg, tmp);
    2554           4 :       __ cvttps2dq(tmp, tmp);
    2555             :       __ pxor(tmp, kScratchDoubleReg);
    2556             :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2557             :       __ pmaxsd(tmp, kScratchDoubleReg);
    2558             :       // convert. Overflow lanes above max_signed will be 0x80000000
    2559           4 :       __ cvttps2dq(dst, dst);
    2560             :       // Add (src-max_signed) for overflow lanes.
    2561             :       __ paddd(dst, tmp);
    2562             :       break;
    2563             :     }
    2564             :     case kX64I32x4UConvertI16x8Low: {
    2565             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2566           4 :       __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2567             :       break;
    2568             :     }
    2569             :     case kX64I32x4UConvertI16x8High: {
    2570             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2571             :       XMMRegister dst = i.OutputSimd128Register();
    2572           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2573             :       __ pmovzxwd(dst, dst);
    2574             :       break;
    2575             :     }
    2576             :     case kX64I32x4ShrU: {
    2577         248 :       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
    2578         124 :       break;
    2579             :     }
    2580             :     case kX64I32x4MinU: {
    2581             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2582           4 :       __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2583             :       break;
    2584             :     }
    2585             :     case kX64I32x4MaxU: {
    2586             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2587           4 :       __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2588             :       break;
    2589             :     }
    2590             :     case kX64I32x4GtU: {
    2591             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2592             :       XMMRegister dst = i.OutputSimd128Register();
    2593             :       XMMRegister src = i.InputSimd128Register(1);
    2594           8 :       __ pmaxud(dst, src);
    2595             :       __ pcmpeqd(dst, src);
    2596             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2597             :       __ pxor(dst, kScratchDoubleReg);
    2598             :       break;
    2599             :     }
    2600             :     case kX64I32x4GeU: {
    2601             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2602             :       XMMRegister dst = i.OutputSimd128Register();
    2603             :       XMMRegister src = i.InputSimd128Register(1);
    2604           8 :       __ pminud(dst, src);
    2605             :       __ pcmpeqd(dst, src);
    2606             :       break;
    2607             :     }
    2608             :     case kX64S128Zero: {
    2609          22 :       XMMRegister dst = i.OutputSimd128Register();
    2610          22 :       __ xorps(dst, dst);
    2611             :       break;
    2612             :     }
    2613             :     case kX64I16x8Splat: {
    2614         436 :       XMMRegister dst = i.OutputSimd128Register();
    2615         436 :       if (instr->InputAt(0)->IsRegister()) {
    2616         436 :         __ movd(dst, i.InputRegister(0));
    2617             :       } else {
    2618           0 :         __ movd(dst, i.InputOperand(0));
    2619             :       }
    2620         436 :       __ pshuflw(dst, dst, 0x0);
    2621         436 :       __ pshufd(dst, dst, 0x0);
    2622             :       break;
    2623             :     }
    2624             :     case kX64I16x8ExtractLane: {
    2625             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2626          48 :       Register dst = i.OutputRegister();
    2627          96 :       __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2628          48 :       __ movsxwl(dst, dst);
    2629             :       break;
    2630             :     }
    2631             :     case kX64I16x8ReplaceLane: {
    2632             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2633          52 :       if (instr->InputAt(2)->IsRegister()) {
    2634         104 :         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
    2635          52 :                   i.InputInt8(1));
    2636             :       } else {
    2637           0 :         __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2638             :       }
    2639             :       break;
    2640             :     }
    2641             :     case kX64I16x8SConvertI8x16Low: {
    2642             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2643           4 :       __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2644             :       break;
    2645             :     }
    2646             :     case kX64I16x8SConvertI8x16High: {
    2647             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2648             :       XMMRegister dst = i.OutputSimd128Register();
    2649           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2650             :       __ pmovsxbw(dst, dst);
    2651             :       break;
    2652             :     }
    2653             :     case kX64I16x8Neg: {
    2654             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2655             :       XMMRegister dst = i.OutputSimd128Register();
    2656             :       XMMRegister src = i.InputSimd128Register(0);
    2657           4 :       if (dst == src) {
    2658           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2659             :         __ psignw(dst, kScratchDoubleReg);
    2660             :       } else {
    2661           0 :         __ pxor(dst, dst);
    2662             :         __ psubw(dst, src);
    2663             :       }
    2664             :       break;
    2665             :     }
    2666             :     case kX64I16x8Shl: {
    2667         120 :       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
    2668          60 :       break;
    2669             :     }
    2670             :     case kX64I16x8ShrS: {
    2671         120 :       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
    2672          60 :       break;
    2673             :     }
    2674             :     case kX64I16x8SConvertI32x4: {
    2675             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2676           4 :       __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2677             :       break;
    2678             :     }
    2679             :     case kX64I16x8Add: {
    2680           4 :       __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2681             :       break;
    2682             :     }
    2683             :     case kX64I16x8AddSaturateS: {
    2684           4 :       __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2685             :       break;
    2686             :     }
    2687             :     case kX64I16x8AddHoriz: {
    2688             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2689           4 :       __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2690             :       break;
    2691             :     }
    2692             :     case kX64I16x8Sub: {
    2693           4 :       __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2694             :       break;
    2695             :     }
    2696             :     case kX64I16x8SubSaturateS: {
    2697           4 :       __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2698             :       break;
    2699             :     }
    2700             :     case kX64I16x8Mul: {
    2701             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2702           4 :       __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2703             :       break;
    2704             :     }
    2705             :     case kX64I16x8MinS: {
    2706             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2707           4 :       __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2708             :       break;
    2709             :     }
    2710             :     case kX64I16x8MaxS: {
    2711             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2712           4 :       __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2713             :       break;
    2714             :     }
    2715             :     case kX64I16x8Eq: {
    2716          12 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2717             :       break;
    2718             :     }
    2719             :     case kX64I16x8Ne: {
    2720          16 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2721             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2722             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2723             :       break;
    2724             :     }
    2725             :     case kX64I16x8GtS: {
    2726           8 :       __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2727             :       break;
    2728             :     }
    2729             :     case kX64I16x8GeS: {
    2730             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2731             :       XMMRegister dst = i.OutputSimd128Register();
    2732             :       XMMRegister src = i.InputSimd128Register(1);
    2733           8 :       __ pminsw(dst, src);
    2734             :       __ pcmpeqw(dst, src);
    2735             :       break;
    2736             :     }
    2737             :     case kX64I16x8UConvertI8x16Low: {
    2738             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2739           4 :       __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2740             :       break;
    2741             :     }
    2742             :     case kX64I16x8UConvertI8x16High: {
    2743             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2744             :       XMMRegister dst = i.OutputSimd128Register();
    2745           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2746             :       __ pmovzxbw(dst, dst);
    2747             :       break;
    2748             :     }
    2749             :     case kX64I16x8ShrU: {
    2750         120 :       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
    2751          60 :       break;
    2752             :     }
    2753             :     case kX64I16x8UConvertI32x4: {
    2754             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2755             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2756             :       XMMRegister dst = i.OutputSimd128Register();
    2757             :       // Change negative lanes to 0x7FFFFFFF
    2758           4 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2759           4 :       __ psrld(kScratchDoubleReg, 1);
    2760             :       __ pminud(dst, kScratchDoubleReg);
    2761             :       __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
    2762             :       __ packusdw(dst, kScratchDoubleReg);
    2763             :       break;
    2764             :     }
    2765             :     case kX64I16x8AddSaturateU: {
    2766           4 :       __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2767             :       break;
    2768             :     }
    2769             :     case kX64I16x8SubSaturateU: {
    2770           4 :       __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2771             :       break;
    2772             :     }
    2773             :     case kX64I16x8MinU: {
    2774             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2775           4 :       __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2776             :       break;
    2777             :     }
    2778             :     case kX64I16x8MaxU: {
    2779             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2780           4 :       __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2781             :       break;
    2782             :     }
    2783             :     case kX64I16x8GtU: {
    2784             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2785             :       XMMRegister dst = i.OutputSimd128Register();
    2786             :       XMMRegister src = i.InputSimd128Register(1);
    2787          16 :       __ pmaxuw(dst, src);
    2788             :       __ pcmpeqw(dst, src);
    2789             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2790             :       __ pxor(dst, kScratchDoubleReg);
    2791             :       break;
    2792             :     }
    2793             :     case kX64I16x8GeU: {
    2794             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2795             :       XMMRegister dst = i.OutputSimd128Register();
    2796             :       XMMRegister src = i.InputSimd128Register(1);
    2797          16 :       __ pminuw(dst, src);
    2798             :       __ pcmpeqw(dst, src);
    2799             :       break;
    2800             :     }
    2801             :     case kX64I8x16Splat: {
    2802             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2803             :       XMMRegister dst = i.OutputSimd128Register();
    2804         320 :       if (instr->InputAt(0)->IsRegister()) {
    2805         320 :         __ movd(dst, i.InputRegister(0));
    2806             :       } else {
    2807           0 :         __ movd(dst, i.InputOperand(0));
    2808             :       }
    2809         320 :       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
    2810             :       __ pshufb(dst, kScratchDoubleReg);
    2811             :       break;
    2812             :     }
    2813             :     case kX64I8x16ExtractLane: {
    2814             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2815          48 :       Register dst = i.OutputRegister();
    2816          96 :       __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2817          48 :       __ movsxbl(dst, dst);
    2818             :       break;
    2819             :     }
    2820             :     case kX64I8x16ReplaceLane: {
    2821             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2822          84 :       if (instr->InputAt(2)->IsRegister()) {
    2823         168 :         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
    2824          84 :                   i.InputInt8(1));
    2825             :       } else {
    2826           0 :         __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2827             :       }
    2828             :       break;
    2829             :     }
    2830             :     case kX64I8x16SConvertI16x8: {
    2831             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2832           4 :       __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2833             :       break;
    2834             :     }
    2835             :     case kX64I8x16Neg: {
    2836             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2837             :       XMMRegister dst = i.OutputSimd128Register();
    2838             :       XMMRegister src = i.InputSimd128Register(0);
    2839           4 :       if (dst == src) {
    2840           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2841             :         __ psignb(dst, kScratchDoubleReg);
    2842             :       } else {
    2843           0 :         __ pxor(dst, dst);
    2844             :         __ psubb(dst, src);
    2845             :       }
    2846             :       break;
    2847             :     }
    2848             :     case kX64I8x16Shl: {
    2849             :       XMMRegister dst = i.OutputSimd128Register();
    2850             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2851          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2852          28 :       if (shift < 4) {
    2853             :         // For small shifts, doubling is faster.
    2854          60 :         for (int i = 0; i < shift; ++i) {
    2855          24 :           __ paddb(dst, dst);
    2856             :         }
    2857             :       } else {
    2858             :         // Mask off the unwanted bits before word-shifting.
    2859          16 :         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2860          16 :         __ psrlw(kScratchDoubleReg, 8 + shift);
    2861             :         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
    2862             :         __ pand(dst, kScratchDoubleReg);
    2863          16 :         __ psllw(dst, shift);
    2864             :       }
    2865             :       break;
    2866             :     }
    2867             :     case kX64I8x16ShrS: {
    2868             :       XMMRegister dst = i.OutputSimd128Register();
    2869             :       XMMRegister src = i.InputSimd128Register(0);
    2870          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2871             :       // Unpack the bytes into words, do arithmetic shifts, and repack.
    2872          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2873             :       __ punpcklbw(dst, src);
    2874          28 :       __ psraw(kScratchDoubleReg, 8 + shift);
    2875          28 :       __ psraw(dst, 8 + shift);
    2876             :       __ packsswb(dst, kScratchDoubleReg);
    2877             :       break;
    2878             :     }
    2879             :     case kX64I8x16Add: {
    2880           4 :       __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2881             :       break;
    2882             :     }
    2883             :     case kX64I8x16AddSaturateS: {
    2884           4 :       __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2885             :       break;
    2886             :     }
    2887             :     case kX64I8x16Sub: {
    2888           4 :       __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2889             :       break;
    2890             :     }
    2891             :     case kX64I8x16SubSaturateS: {
    2892           4 :       __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2893             :       break;
    2894             :     }
    2895             :     case kX64I8x16Mul: {
    2896             :       XMMRegister dst = i.OutputSimd128Register();
    2897             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2898             :       XMMRegister right = i.InputSimd128Register(1);
    2899             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2900             :       // I16x8 view of I8x16
    2901             :       // left = AAaa AAaa ... AAaa AAaa
    2902             :       // right= BBbb BBbb ... BBbb BBbb
    2903             :       // t = 00AA 00AA ... 00AA 00AA
    2904             :       // s = 00BB 00BB ... 00BB 00BB
    2905           4 :       __ movaps(tmp, dst);
    2906           4 :       __ movaps(kScratchDoubleReg, right);
    2907           4 :       __ psrlw(tmp, 8);
    2908           4 :       __ psrlw(kScratchDoubleReg, 8);
    2909             :       // dst = left * 256
    2910           4 :       __ psllw(dst, 8);
    2911             :       // t = I16x8Mul(t, s)
    2912             :       //    => __PP __PP ...  __PP  __PP
    2913             :       __ pmullw(tmp, kScratchDoubleReg);
    2914             :       // dst = I16x8Mul(left * 256, right)
    2915             :       //    => pp__ pp__ ...  pp__  pp__
    2916             :       __ pmullw(dst, right);
    2917             :       // t = I16x8Shl(t, 8)
    2918             :       //    => PP00 PP00 ...  PP00  PP00
    2919           4 :       __ psllw(tmp, 8);
    2920             :       // dst = I16x8Shr(dst, 8)
    2921             :       //    => 00pp 00pp ...  00pp  00pp
    2922           4 :       __ psrlw(dst, 8);
    2923             :       // dst = I16x8Or(dst, t)
    2924             :       //    => PPpp PPpp ...  PPpp  PPpp
    2925             :       __ por(dst, tmp);
    2926             :       break;
    2927             :     }
    2928             :     case kX64I8x16MinS: {
    2929             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2930           4 :       __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2931             :       break;
    2932             :     }
    2933             :     case kX64I8x16MaxS: {
    2934             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2935           4 :       __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2936             :       break;
    2937             :     }
    2938             :     case kX64I8x16Eq: {
    2939          12 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2940             :       break;
    2941             :     }
    2942             :     case kX64I8x16Ne: {
    2943          16 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2944             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2945             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2946             :       break;
    2947             :     }
    2948             :     case kX64I8x16GtS: {
    2949           8 :       __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2950             :       break;
    2951             :     }
    2952             :     case kX64I8x16GeS: {
    2953             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2954             :       XMMRegister dst = i.OutputSimd128Register();
    2955             :       XMMRegister src = i.InputSimd128Register(1);
    2956           8 :       __ pminsb(dst, src);
    2957             :       __ pcmpeqb(dst, src);
    2958             :       break;
    2959             :     }
    2960             :     case kX64I8x16UConvertI16x8: {
    2961             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2962             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2963             :       XMMRegister dst = i.OutputSimd128Register();
    2964             :       // Change negative lanes to 0x7FFF
    2965           4 :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2966           4 :       __ psrlw(kScratchDoubleReg, 1);
    2967             :       __ pminuw(dst, kScratchDoubleReg);
    2968             :       __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
    2969             :       __ packuswb(dst, kScratchDoubleReg);
    2970             :       break;
    2971             :     }
    2972             :     case kX64I8x16ShrU: {
    2973             :       XMMRegister dst = i.OutputSimd128Register();
    2974             :       XMMRegister src = i.InputSimd128Register(0);
    2975          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2976             :       // Unpack the bytes into words, do logical shifts, and repack.
    2977          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2978             :       __ punpcklbw(dst, src);
    2979          28 :       __ psrlw(kScratchDoubleReg, 8 + shift);
    2980          28 :       __ psrlw(dst, 8 + shift);
    2981             :       __ packuswb(dst, kScratchDoubleReg);
    2982             :       break;
    2983             :     }
    2984             :     case kX64I8x16AddSaturateU: {
    2985           4 :       __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2986             :       break;
    2987             :     }
    2988             :     case kX64I8x16SubSaturateU: {
    2989           4 :       __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2990             :       break;
    2991             :     }
    2992             :     case kX64I8x16MinU: {
    2993             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2994           4 :       __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2995             :       break;
    2996             :     }
    2997             :     case kX64I8x16MaxU: {
    2998             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2999           4 :       __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3000             :       break;
    3001             :     }
    3002             :     case kX64I8x16GtU: {
    3003             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3004             :       XMMRegister dst = i.OutputSimd128Register();
    3005             :       XMMRegister src = i.InputSimd128Register(1);
    3006          16 :       __ pmaxub(dst, src);
    3007             :       __ pcmpeqb(dst, src);
    3008             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    3009             :       __ pxor(dst, kScratchDoubleReg);
    3010             :       break;
    3011             :     }
    3012             :     case kX64I8x16GeU: {
    3013             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3014             :       XMMRegister dst = i.OutputSimd128Register();
    3015             :       XMMRegister src = i.InputSimd128Register(1);
    3016          16 :       __ pminub(dst, src);
    3017             :       __ pcmpeqb(dst, src);
    3018             :       break;
    3019             :     }
    3020             :     case kX64S128And: {
    3021           4 :       __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3022             :       break;
    3023             :     }
    3024             :     case kX64S128Or: {
    3025           4 :       __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3026             :       break;
    3027             :     }
    3028             :     case kX64S128Xor: {
    3029           4 :       __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3030             :       break;
    3031             :     }
    3032             :     case kX64S128Not: {
    3033             :       XMMRegister dst = i.OutputSimd128Register();
    3034             :       XMMRegister src = i.InputSimd128Register(0);
    3035           4 :       if (dst == src) {
    3036           4 :         __ movaps(kScratchDoubleReg, dst);
    3037             :         __ pcmpeqd(dst, dst);
    3038             :         __ pxor(dst, kScratchDoubleReg);
    3039             :       } else {
    3040           0 :         __ pcmpeqd(dst, dst);
    3041             :         __ pxor(dst, src);
    3042             :       }
    3043             : 
    3044             :       break;
    3045             :     }
    3046             :     case kX64S128Select: {
    3047             :       // Mask used here is stored in dst.
    3048          28 :       XMMRegister dst = i.OutputSimd128Register();
    3049          28 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
    3050          28 :       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
    3051          28 :       __ andps(dst, kScratchDoubleReg);
    3052          28 :       __ xorps(dst, i.InputSimd128Register(2));
    3053             :       break;
    3054             :     }
    3055             :     case kX64S8x16Shuffle: {
    3056             :       XMMRegister dst = i.OutputSimd128Register();
    3057             :       Register tmp = i.TempRegister(0);
    3058             :       // Prepare 16 byte aligned buffer for shuffle control mask
    3059        1460 :       __ movq(tmp, rsp);
    3060             :       __ andq(rsp, Immediate(-16));
    3061        1460 :       if (instr->InputCount() == 5) {  // only one input operand
    3062         696 :         uint32_t mask[4] = {};
    3063             :         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    3064        6264 :         for (int j = 4; j > 0; j--) {
    3065        5568 :           mask[j - 1] = i.InputUint32(j);
    3066             :         }
    3067             : 
    3068         696 :         SetupShuffleMaskOnStack(tasm(), mask);
    3069        1392 :         __ pshufb(dst, Operand(rsp, 0));
    3070             :       } else {  // two input operands
    3071             :         DCHECK_EQ(6, instr->InputCount());
    3072        1528 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
    3073         764 :         uint32_t mask[4] = {};
    3074        6876 :         for (int j = 5; j > 1; j--) {
    3075        3056 :           uint32_t lanes = i.InputUint32(j);
    3076       27504 :           for (int k = 0; k < 32; k += 8) {
    3077       12224 :             uint8_t lane = lanes >> k;
    3078       12224 :             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
    3079             :           }
    3080             :         }
    3081         764 :         SetupShuffleMaskOnStack(tasm(), mask);
    3082        1528 :         __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
    3083         764 :         uint32_t mask1[4] = {};
    3084         764 :         if (instr->InputAt(1)->IsSimd128Register()) {
    3085             :           XMMRegister src1 = i.InputSimd128Register(1);
    3086         764 :           if (src1 != dst) __ movups(dst, src1);
    3087             :         } else {
    3088           0 :           __ movups(dst, i.InputOperand(1));
    3089             :         }
    3090        6876 :         for (int j = 5; j > 1; j--) {
    3091        3056 :           uint32_t lanes = i.InputUint32(j);
    3092       27504 :           for (int k = 0; k < 32; k += 8) {
    3093       12224 :             uint8_t lane = lanes >> k;
    3094       12224 :             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
    3095             :           }
    3096             :         }
    3097         764 :         SetupShuffleMaskOnStack(tasm(), mask1);
    3098        1528 :         __ pshufb(dst, Operand(rsp, 0));
    3099             :         __ por(dst, kScratchDoubleReg);
    3100             :       }
    3101             :       __ movq(rsp, tmp);
    3102             :       break;
    3103             :     }
    3104             :     case kX64S32x4Swizzle: {
    3105             :       DCHECK_EQ(2, instr->InputCount());
    3106         840 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
    3107             :                               i.InputInt8(1));
    3108             :       break;
    3109             :     }
    3110             :     case kX64S32x4Shuffle: {
    3111             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3112             :       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
    3113             :       int8_t shuffle = i.InputInt8(2);
    3114             :       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
    3115         928 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
    3116         928 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
    3117         928 :       __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
    3118             :       break;
    3119             :     }
    3120             :     case kX64S16x8Blend: {
    3121         112 :       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
    3122          56 :       break;
    3123             :     }
    3124             :     case kX64S16x8HalfShuffle1: {
    3125         212 :       XMMRegister dst = i.OutputSimd128Register();
    3126         636 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
    3127         212 :       __ pshufhw(dst, dst, i.InputInt8(2));
    3128             :       break;
    3129             :     }
    3130             :     case kX64S16x8HalfShuffle2: {
    3131             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3132         192 :       XMMRegister dst = i.OutputSimd128Register();
    3133         576 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
    3134         192 :       __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
    3135         576 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
    3136         192 :       __ pshufhw(dst, dst, i.InputInt8(3));
    3137         192 :       __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
    3138             :       break;
    3139             :     }
    3140             :     case kX64S8x16Alignr: {
    3141         480 :       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
    3142         240 :       break;
    3143             :     }
    3144             :     case kX64S16x8Dup: {
    3145         180 :       XMMRegister dst = i.OutputSimd128Register();
    3146         180 :       int8_t lane = i.InputInt8(1) & 0x7;
    3147         180 :       int8_t lane4 = lane & 0x3;
    3148         180 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3149         180 :       if (lane < 4) {
    3150         296 :         ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
    3151         148 :         __ pshufd(dst, dst, 0);
    3152             :       } else {
    3153          64 :         ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
    3154          32 :         __ pshufd(dst, dst, 0xaa);
    3155             :       }
    3156             :       break;
    3157             :     }
    3158             :     case kX64S8x16Dup: {
    3159             :       XMMRegister dst = i.OutputSimd128Register();
    3160         164 :       int8_t lane = i.InputInt8(1) & 0xf;
    3161             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3162         164 :       if (lane < 8) {
    3163         156 :         __ punpcklbw(dst, dst);
    3164             :       } else {
    3165           8 :         __ punpckhbw(dst, dst);
    3166             :       }
    3167         164 :       lane &= 0x7;
    3168         164 :       int8_t lane4 = lane & 0x3;
    3169         164 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3170         164 :       if (lane < 4) {
    3171         164 :         __ pshuflw(dst, dst, half_dup);
    3172         164 :         __ pshufd(dst, dst, 0);
    3173             :       } else {
    3174           0 :         __ pshufhw(dst, dst, half_dup);
    3175           0 :         __ pshufd(dst, dst, 0xaa);
    3176             :       }
    3177             :       break;
    3178             :     }
    3179             :     case kX64S64x2UnpackHigh:
    3180           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
    3181             :       break;
    3182             :     case kX64S32x4UnpackHigh:
    3183         360 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
    3184             :       break;
    3185             :     case kX64S16x8UnpackHigh:
    3186         324 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
    3187             :       break;
    3188             :     case kX64S8x16UnpackHigh:
    3189         252 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
    3190             :       break;
    3191             :     case kX64S64x2UnpackLow:
    3192          24 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
    3193             :       break;
    3194             :     case kX64S32x4UnpackLow:
    3195         276 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
    3196             :       break;
    3197             :     case kX64S16x8UnpackLow:
    3198         252 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
    3199             :       break;
    3200             :     case kX64S8x16UnpackLow:
    3201         264 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
    3202             :       break;
    3203             :     case kX64S16x8UnzipHigh: {
    3204             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3205             :       XMMRegister dst = i.OutputSimd128Register();
    3206             :       XMMRegister src2 = dst;
    3207             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3208         116 :       if (instr->InputCount() == 2) {
    3209         216 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3210         108 :         __ psrld(kScratchDoubleReg, 16);
    3211             :         src2 = kScratchDoubleReg;
    3212             :       }
    3213         116 :       __ psrld(dst, 16);
    3214             :       __ packusdw(dst, src2);
    3215             :       break;
    3216             :     }
    3217             :     case kX64S16x8UnzipLow: {
    3218             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3219             :       XMMRegister dst = i.OutputSimd128Register();
    3220             :       XMMRegister src2 = dst;
    3221             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3222          76 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    3223          76 :       if (instr->InputCount() == 2) {
    3224         136 :         ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
    3225             :         src2 = kScratchDoubleReg;
    3226             :       }
    3227          76 :       __ pblendw(dst, kScratchDoubleReg, 0xaa);
    3228             :       __ packusdw(dst, src2);
    3229             :       break;
    3230             :     }
    3231             :     case kX64S8x16UnzipHigh: {
    3232             :       XMMRegister dst = i.OutputSimd128Register();
    3233             :       XMMRegister src2 = dst;
    3234             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3235          80 :       if (instr->InputCount() == 2) {
    3236         144 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3237          72 :         __ psrlw(kScratchDoubleReg, 8);
    3238             :         src2 = kScratchDoubleReg;
    3239             :       }
    3240          80 :       __ psrlw(dst, 8);
    3241             :       __ packuswb(dst, src2);
    3242             :       break;
    3243             :     }
    3244             :     case kX64S8x16UnzipLow: {
    3245             :       XMMRegister dst = i.OutputSimd128Register();
    3246             :       XMMRegister src2 = dst;
    3247             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3248         156 :       if (instr->InputCount() == 2) {
    3249         280 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3250         140 :         __ psllw(kScratchDoubleReg, 8);
    3251         140 :         __ psrlw(kScratchDoubleReg, 8);
    3252             :         src2 = kScratchDoubleReg;
    3253             :       }
    3254         156 :       __ psllw(dst, 8);
    3255         156 :       __ psrlw(dst, 8);
    3256             :       __ packuswb(dst, src2);
    3257             :       break;
    3258             :     }
    3259             :     case kX64S8x16TransposeLow: {
    3260             :       XMMRegister dst = i.OutputSimd128Register();
    3261             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3262          96 :       __ psllw(dst, 8);
    3263          96 :       if (instr->InputCount() == 1) {
    3264           8 :         __ movups(kScratchDoubleReg, dst);
    3265             :       } else {
    3266             :         DCHECK_EQ(2, instr->InputCount());
    3267         176 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3268          88 :         __ psllw(kScratchDoubleReg, 8);
    3269             :       }
    3270          96 :       __ psrlw(dst, 8);
    3271             :       __ por(dst, kScratchDoubleReg);
    3272             :       break;
    3273             :     }
    3274             :     case kX64S8x16TransposeHigh: {
    3275             :       XMMRegister dst = i.OutputSimd128Register();
    3276             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3277         132 :       __ psrlw(dst, 8);
    3278         132 :       if (instr->InputCount() == 1) {
    3279           8 :         __ movups(kScratchDoubleReg, dst);
    3280             :       } else {
    3281             :         DCHECK_EQ(2, instr->InputCount());
    3282         248 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3283         124 :         __ psrlw(kScratchDoubleReg, 8);
    3284             :       }
    3285         132 :       __ psllw(kScratchDoubleReg, 8);
    3286             :       __ por(dst, kScratchDoubleReg);
    3287             :       break;
    3288             :     }
    3289             :     case kX64S8x8Reverse:
    3290             :     case kX64S8x4Reverse:
    3291             :     case kX64S8x2Reverse: {
    3292             :       DCHECK_EQ(1, instr->InputCount());
    3293             :       XMMRegister dst = i.OutputSimd128Register();
    3294             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3295         280 :       if (arch_opcode != kX64S8x2Reverse) {
    3296             :         // First shuffle words into position.
    3297         188 :         int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
    3298         188 :         __ pshuflw(dst, dst, shuffle_mask);
    3299         188 :         __ pshufhw(dst, dst, shuffle_mask);
    3300             :       }
    3301         280 :       __ movaps(kScratchDoubleReg, dst);
    3302         280 :       __ psrlw(kScratchDoubleReg, 8);
    3303         280 :       __ psllw(dst, 8);
    3304             :       __ por(dst, kScratchDoubleReg);
    3305             :       break;
    3306             :     }
    3307             :     case kX64S1x4AnyTrue:
    3308             :     case kX64S1x8AnyTrue:
    3309             :     case kX64S1x16AnyTrue: {
    3310             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3311             :       Register dst = i.OutputRegister();
    3312             :       XMMRegister src = i.InputSimd128Register(0);
    3313             :       Register tmp = i.TempRegister(0);
    3314          60 :       __ xorq(tmp, tmp);
    3315             :       __ movq(dst, Immediate(1));
    3316             :       __ ptest(src, src);
    3317          60 :       __ cmovq(zero, dst, tmp);
    3318             :       break;
    3319             :     }
    3320             :     case kX64S1x4AllTrue:
    3321             :     case kX64S1x8AllTrue:
    3322             :     case kX64S1x16AllTrue: {
    3323             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3324             :       Register dst = i.OutputRegister();
    3325             :       XMMRegister src = i.InputSimd128Register(0);
    3326             :       Register tmp = i.TempRegister(0);
    3327          60 :       __ movq(tmp, Immediate(1));
    3328             :       __ xorq(dst, dst);
    3329             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    3330             :       __ pxor(kScratchDoubleReg, src);
    3331             :       __ ptest(kScratchDoubleReg, kScratchDoubleReg);
    3332          60 :       __ cmovq(zero, dst, tmp);
    3333             :       break;
    3334             :     }
    3335             :     case kX64StackCheck:
    3336      562860 :       __ CompareRoot(rsp, RootIndex::kStackLimit);
    3337      562860 :       break;
    3338             :     case kWord32AtomicExchangeInt8: {
    3339         801 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3340         801 :       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
    3341         801 :       break;
    3342             :     }
    3343             :     case kWord32AtomicExchangeUint8: {
    3344         646 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3345             :       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
    3346             :       break;
    3347             :     }
    3348             :     case kWord32AtomicExchangeInt16: {
    3349         833 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3350         835 :       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
    3351         835 :       break;
    3352             :     }
    3353             :     case kWord32AtomicExchangeUint16: {
    3354         800 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3355             :       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
    3356             :       break;
    3357             :     }
    3358             :     case kWord32AtomicExchangeWord32: {
    3359        1276 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3360             :       break;
    3361             :     }
    3362             :     case kWord32AtomicCompareExchangeInt8: {
    3363         112 :       __ lock();
    3364         112 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3365         112 :       __ movsxbl(rax, rax);
    3366         112 :       break;
    3367             :     }
    3368             :     case kWord32AtomicCompareExchangeUint8: {
    3369         133 :       __ lock();
    3370         133 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3371             :       __ movzxbl(rax, rax);
    3372             :       break;
    3373             :     }
    3374             :     case kWord32AtomicCompareExchangeInt16: {
    3375         112 :       __ lock();
    3376         112 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3377         112 :       __ movsxwl(rax, rax);
    3378         112 :       break;
    3379             :     }
    3380             :     case kWord32AtomicCompareExchangeUint16: {
    3381         133 :       __ lock();
    3382         133 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3383             :       __ movzxwl(rax, rax);
    3384             :       break;
    3385             :     }
    3386             :     case kWord32AtomicCompareExchangeWord32: {
    3387         264 :       __ lock();
    3388             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3389             :       break;
    3390             :     }
    3391             : #define ATOMIC_BINOP_CASE(op, inst)              \
    3392             :   case kWord32Atomic##op##Int8:                  \
    3393             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3394             :     __ movsxbl(rax, rax);                        \
    3395             :     break;                                       \
    3396             :   case kWord32Atomic##op##Uint8:                 \
    3397             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3398             :     __ movzxbl(rax, rax);                        \
    3399             :     break;                                       \
    3400             :   case kWord32Atomic##op##Int16:                 \
    3401             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3402             :     __ movsxwl(rax, rax);                        \
    3403             :     break;                                       \
    3404             :   case kWord32Atomic##op##Uint16:                \
    3405             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3406             :     __ movzxwl(rax, rax);                        \
    3407             :     break;                                       \
    3408             :   case kWord32Atomic##op##Word32:                \
    3409             :     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
    3410             :     break;
    3411        8752 :       ATOMIC_BINOP_CASE(Add, addl)
    3412        8267 :       ATOMIC_BINOP_CASE(Sub, subl)
    3413        8186 :       ATOMIC_BINOP_CASE(And, andl)
    3414        7557 :       ATOMIC_BINOP_CASE(Or, orl)
    3415        8369 :       ATOMIC_BINOP_CASE(Xor, xorl)
    3416             : #undef ATOMIC_BINOP_CASE
    3417             :     case kX64Word64AtomicExchangeUint8: {
    3418        1454 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3419             :       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
    3420             :       break;
    3421             :     }
    3422             :     case kX64Word64AtomicExchangeUint16: {
    3423        1280 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3424             :       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
    3425             :       break;
    3426             :     }
    3427             :     case kX64Word64AtomicExchangeUint32: {
    3428         714 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3429             :       break;
    3430             :     }
    3431             :     case kX64Word64AtomicExchangeUint64: {
    3432         963 :       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
    3433             :       break;
    3434             :     }
    3435             :     case kX64Word64AtomicCompareExchangeUint8: {
    3436          21 :       __ lock();
    3437          21 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3438             :       __ movzxbq(rax, rax);
    3439             :       break;
    3440             :     }
    3441             :     case kX64Word64AtomicCompareExchangeUint16: {
    3442          28 :       __ lock();
    3443          28 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3444             :       __ movzxwq(rax, rax);
    3445             :       break;
    3446             :     }
    3447             :     case kX64Word64AtomicCompareExchangeUint32: {
    3448          29 :       __ lock();
    3449             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3450             :       break;
    3451             :     }
    3452             :     case kX64Word64AtomicCompareExchangeUint64: {
    3453         276 :       __ lock();
    3454             :       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
    3455             :       break;
    3456             :     }
    3457             : #define ATOMIC64_BINOP_CASE(op, inst)              \
    3458             :   case kX64Word64Atomic##op##Uint8:                \
    3459             :     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
    3460             :     __ movzxbq(rax, rax);                          \
    3461             :     break;                                         \
    3462             :   case kX64Word64Atomic##op##Uint16:               \
    3463             :     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
    3464             :     __ movzxwq(rax, rax);                          \
    3465             :     break;                                         \
    3466             :   case kX64Word64Atomic##op##Uint32:               \
    3467             :     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
    3468             :     break;                                         \
    3469             :   case kX64Word64Atomic##op##Uint64:               \
    3470             :     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
    3471             :     break;
    3472        8542 :       ATOMIC64_BINOP_CASE(Add, addq)
    3473        7063 :       ATOMIC64_BINOP_CASE(Sub, subq)
    3474        8118 :       ATOMIC64_BINOP_CASE(And, andq)
    3475        8870 :       ATOMIC64_BINOP_CASE(Or, orq)
    3476        7084 :       ATOMIC64_BINOP_CASE(Xor, xorq)
    3477             : #undef ATOMIC64_BINOP_CASE
    3478             :     case kWord32AtomicLoadInt8:
    3479             :     case kWord32AtomicLoadUint8:
    3480             :     case kWord32AtomicLoadInt16:
    3481             :     case kWord32AtomicLoadUint16:
    3482             :     case kWord32AtomicLoadWord32:
    3483             :     case kWord32AtomicStoreWord8:
    3484             :     case kWord32AtomicStoreWord16:
    3485             :     case kWord32AtomicStoreWord32:
    3486             :     case kX64Word64AtomicLoadUint8:
    3487             :     case kX64Word64AtomicLoadUint16:
    3488             :     case kX64Word64AtomicLoadUint32:
    3489             :     case kX64Word64AtomicLoadUint64:
    3490             :     case kX64Word64AtomicStoreWord8:
    3491             :     case kX64Word64AtomicStoreWord16:
    3492             :     case kX64Word64AtomicStoreWord32:
    3493             :     case kX64Word64AtomicStoreWord64:
    3494           0 :       UNREACHABLE();  // Won't be generated by instruction selector.
    3495             :       break;
    3496             :   }
    3497             :   return kSuccess;
    3498             : }  // NOLadability/fn_size)
    3499             : 
    3500             : #undef ASSEMBLE_UNOP
    3501             : #undef ASSEMBLE_BINOP
    3502             : #undef ASSEMBLE_COMPARE
    3503             : #undef ASSEMBLE_MULT
    3504             : #undef ASSEMBLE_SHIFT
    3505             : #undef ASSEMBLE_MOVX
    3506             : #undef ASSEMBLE_SSE_BINOP
    3507             : #undef ASSEMBLE_SSE_UNOP
    3508             : #undef ASSEMBLE_AVX_BINOP
    3509             : #undef ASSEMBLE_IEEE754_BINOP
    3510             : #undef ASSEMBLE_IEEE754_UNOP
    3511             : #undef ASSEMBLE_ATOMIC_BINOP
    3512             : #undef ASSEMBLE_ATOMIC64_BINOP
    3513             : #undef ASSEMBLE_SIMD_INSTR
    3514             : #undef ASSEMBLE_SIMD_IMM_INSTR
    3515             : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
    3516             : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
    3517             : 
    3518             : namespace {
    3519             : 
    3520     6215662 : Condition FlagsConditionToCondition(FlagsCondition condition) {
    3521     6215662 :   switch (condition) {
    3522             :     case kUnorderedEqual:
    3523             :     case kEqual:
    3524             :       return equal;
    3525             :     case kUnorderedNotEqual:
    3526             :     case kNotEqual:
    3527     1455948 :       return not_equal;
    3528             :     case kSignedLessThan:
    3529      176737 :       return less;
    3530             :     case kSignedGreaterThanOrEqual:
    3531       59103 :       return greater_equal;
    3532             :     case kSignedLessThanOrEqual:
    3533       67408 :       return less_equal;
    3534             :     case kSignedGreaterThan:
    3535       67152 :       return greater;
    3536             :     case kUnsignedLessThan:
    3537      165286 :       return below;
    3538             :     case kUnsignedGreaterThanOrEqual:
    3539      377166 :       return above_equal;
    3540             :     case kUnsignedLessThanOrEqual:
    3541      945490 :       return below_equal;
    3542             :     case kUnsignedGreaterThan:
    3543      125711 :       return above;
    3544             :     case kOverflow:
    3545      179872 :       return overflow;
    3546             :     case kNotOverflow:
    3547        1008 :       return no_overflow;
    3548             :     default:
    3549             :       break;
    3550             :   }
    3551           0 :   UNREACHABLE();
    3552             : }
    3553             : 
    3554             : }  // namespace
    3555             : 
    3556             : // Assembles branches after this instruction.
    3557     5366374 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
    3558             :   Label::Distance flabel_distance =
    3559     5366374 :       branch->fallthru ? Label::kNear : Label::kFar;
    3560     5366374 :   Label* tlabel = branch->true_label;
    3561     5366374 :   Label* flabel = branch->false_label;
    3562     5366374 :   if (branch->condition == kUnorderedEqual) {
    3563       57543 :     __ j(parity_even, flabel, flabel_distance);
    3564     5308831 :   } else if (branch->condition == kUnorderedNotEqual) {
    3565      104727 :     __ j(parity_even, tlabel);
    3566             :   }
    3567     5366372 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3568             : 
    3569     5366397 :   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
    3570     5366397 : }
    3571             : 
    3572           0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
    3573             :                                             Instruction* instr) {
    3574             :   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
    3575           0 :   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
    3576             :     return;
    3577             :   }
    3578             : 
    3579             :   condition = NegateFlagsCondition(condition);
    3580           0 :   __ movl(kScratchRegister, Immediate(0));
    3581           0 :   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
    3582           0 :            kScratchRegister);
    3583             : }
    3584             : 
    3585      330893 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
    3586             :                                             BranchInfo* branch) {
    3587             :   Label::Distance flabel_distance =
    3588      330893 :       branch->fallthru ? Label::kNear : Label::kFar;
    3589      330893 :   Label* tlabel = branch->true_label;
    3590      330893 :   Label* flabel = branch->false_label;
    3591      330893 :   Label nodeopt;
    3592      330893 :   if (branch->condition == kUnorderedEqual) {
    3593           0 :     __ j(parity_even, flabel, flabel_distance);
    3594      330893 :   } else if (branch->condition == kUnorderedNotEqual) {
    3595        4138 :     __ j(parity_even, tlabel);
    3596             :   }
    3597      330893 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3598             : 
    3599      330893 :   if (FLAG_deopt_every_n_times > 0) {
    3600             :     ExternalReference counter =
    3601         288 :         ExternalReference::stress_deopt_count(isolate());
    3602             : 
    3603         288 :     __ pushfq();
    3604         288 :     __ pushq(rax);
    3605         288 :     __ load_rax(counter);
    3606             :     __ decl(rax);
    3607         288 :     __ j(not_zero, &nodeopt);
    3608             : 
    3609         576 :     __ Set(rax, FLAG_deopt_every_n_times);
    3610         288 :     __ store_rax(counter);
    3611         288 :     __ popq(rax);
    3612         288 :     __ popfq();
    3613         288 :     __ jmp(tlabel);
    3614             : 
    3615         288 :     __ bind(&nodeopt);
    3616         288 :     __ store_rax(counter);
    3617         288 :     __ popq(rax);
    3618         288 :     __ popfq();
    3619             :   }
    3620             : 
    3621      330893 :   if (!branch->fallthru) {
    3622           0 :     __ jmp(flabel, flabel_distance);
    3623             :   }
    3624      330893 : }
    3625             : 
    3626     5054591 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
    3627     8283638 :   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
    3628     5054623 : }
    3629             : 
    3630      142017 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
    3631             :                                      FlagsCondition condition) {
    3632             :   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
    3633             :   Label* tlabel = ool->entry();
    3634      141969 :   Label end;
    3635      141969 :   if (condition == kUnorderedEqual) {
    3636           0 :     __ j(parity_even, &end);
    3637      141969 :   } else if (condition == kUnorderedNotEqual) {
    3638         307 :     __ j(parity_even, tlabel);
    3639             :   }
    3640      141969 :   __ j(FlagsConditionToCondition(condition), tlabel);
    3641      142085 :   __ bind(&end);
    3642      142148 : }
    3643             : 
    3644             : // Assembles boolean materializations after this instruction.
    3645      376447 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
    3646             :                                         FlagsCondition condition) {
    3647             :   X64OperandConverter i(this, instr);
    3648      376447 :   Label done;
    3649             : 
    3650             :   // Materialize a full 64-bit 1 or 0 value. The result register is always the
    3651             :   // last output of the instruction.
    3652      376447 :   Label check;
    3653             :   DCHECK_NE(0u, instr->OutputCount());
    3654      376447 :   Register reg = i.OutputRegister(instr->OutputCount() - 1);
    3655      376447 :   if (condition == kUnorderedEqual) {
    3656        2892 :     __ j(parity_odd, &check, Label::kNear);
    3657             :     __ movl(reg, Immediate(0));
    3658        2892 :     __ jmp(&done, Label::kNear);
    3659      373555 :   } else if (condition == kUnorderedNotEqual) {
    3660        2498 :     __ j(parity_odd, &check, Label::kNear);
    3661             :     __ movl(reg, Immediate(1));
    3662        2499 :     __ jmp(&done, Label::kNear);
    3663             :   }
    3664      376447 :   __ bind(&check);
    3665      376445 :   __ setcc(FlagsConditionToCondition(condition), reg);
    3666             :   __ movzxbl(reg, reg);
    3667      376449 :   __ bind(&done);
    3668      376451 : }
    3669             : 
    3670       34385 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
    3671             :   X64OperandConverter i(this, instr);
    3672       34385 :   Register input = i.InputRegister(0);
    3673             :   std::vector<std::pair<int32_t, Label*>> cases;
    3674      420657 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3675      386272 :     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
    3676             :   }
    3677             :   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
    3678       34385 :                                       cases.data() + cases.size());
    3679       34385 : }
    3680             : 
    3681           0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
    3682             :   X64OperandConverter i(this, instr);
    3683             :   Register input = i.InputRegister(0);
    3684           0 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3685           0 :     __ cmpl(input, Immediate(i.InputInt32(index + 0)));
    3686           0 :     __ j(equal, GetLabel(i.InputRpo(index + 1)));
    3687             :   }
    3688           0 :   AssembleArchJump(i.InputRpo(1));
    3689           0 : }
    3690             : 
    3691         344 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
    3692             :   X64OperandConverter i(this, instr);
    3693             :   Register input = i.InputRegister(0);
    3694         344 :   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
    3695         344 :   Label** cases = zone()->NewArray<Label*>(case_count);
    3696      399988 :   for (int32_t index = 0; index < case_count; ++index) {
    3697      399644 :     cases[index] = GetLabel(i.InputRpo(index + 2));
    3698             :   }
    3699         344 :   Label* const table = AddJumpTable(cases, case_count);
    3700         344 :   __ cmpl(input, Immediate(case_count));
    3701         344 :   __ j(above_equal, GetLabel(i.InputRpo(1)));
    3702         688 :   __ leaq(kScratchRegister, Operand(table));
    3703         344 :   __ jmp(Operand(kScratchRegister, input, times_8, 0));
    3704         344 : }
    3705             : 
    3706             : namespace {
    3707             : 
    3708             : static const int kQuadWordSize = 16;
    3709             : 
    3710             : }  // namespace
    3711             : 
    3712     2640179 : void CodeGenerator::FinishFrame(Frame* frame) {
    3713             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3714             : 
    3715             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3716     2640179 :   if (saves_fp != 0) {
    3717             :     frame->AlignSavedCalleeRegisterSlots();
    3718           0 :     if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3719             :       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3720           0 :       frame->AllocateSavedCalleeRegisterSlots(
    3721           0 :           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
    3722             :     }
    3723             :   }
    3724             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3725     2640179 :   if (saves != 0) {  // Save callee-saved registers.
    3726             :     int count = 0;
    3727    29846157 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3728    14470864 :       if (((1 << i) & saves)) {
    3729     4522145 :         ++count;
    3730             :       }
    3731             :     }
    3732             :     frame->AllocateSavedCalleeRegisterSlots(count);
    3733             :   }
    3734     2640179 : }
    3735             : 
    3736     2662395 : void CodeGenerator::AssembleConstructFrame() {
    3737             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3738     2662395 :   if (frame_access_state()->has_frame()) {
    3739             :     int pc_base = __ pc_offset();
    3740             : 
    3741     2662870 :     if (call_descriptor->IsCFunctionCall()) {
    3742      904429 :       __ pushq(rbp);
    3743             :       __ movq(rbp, rsp);
    3744     1758441 :     } else if (call_descriptor->IsJSFunctionCall()) {
    3745      645321 :       __ Prologue();
    3746      645328 :       if (call_descriptor->PushArgumentCount()) {
    3747       39056 :         __ pushq(kJavaScriptCallArgCountRegister);
    3748             :       }
    3749             :     } else {
    3750     2226158 :       __ StubPrologue(info()->GetOutputStackFrameType());
    3751     1112894 :       if (call_descriptor->IsWasmFunctionCall()) {
    3752      987493 :         __ pushq(kWasmInstanceRegister);
    3753      125401 :       } else if (call_descriptor->IsWasmImportWrapper()) {
    3754             :         // WASM import wrappers are passed a tuple in the place of the instance.
    3755             :         // Unpack the tuple into the instance and the target callable.
    3756             :         // This must be done here in the codegen because it cannot be expressed
    3757             :         // properly in the graph.
    3758             :         __ LoadTaggedPointerField(
    3759             :             kJSFunctionRegister,
    3760        6859 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
    3761             :         __ LoadTaggedPointerField(
    3762             :             kWasmInstanceRegister,
    3763        6859 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
    3764        6859 :         __ pushq(kWasmInstanceRegister);
    3765             :       }
    3766             :     }
    3767             : 
    3768     2662980 :     unwinding_info_writer_.MarkFrameConstructed(pc_base);
    3769             :   }
    3770             :   int required_slots = frame()->GetTotalFrameSlotCount() -
    3771     2661666 :                        call_descriptor->CalculateFixedFrameSize();
    3772             : 
    3773     2662265 :   if (info()->is_osr()) {
    3774             :     // TurboFan OSR-compiled functions cannot be entered directly.
    3775        4647 :     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
    3776             : 
    3777             :     // Unoptimized code jumps directly to this entrypoint while the unoptimized
    3778             :     // frame is still on the stack. Optimized code uses OSR values directly from
    3779             :     // the unoptimized frame. Thus, all that needs to be done is to allocate the
    3780             :     // remaining stack slots.
    3781        4647 :     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
    3782        4647 :     osr_pc_offset_ = __ pc_offset();
    3783        4647 :     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
    3784        4647 :     ResetSpeculationPoison();
    3785             :   }
    3786             : 
    3787             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3788             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3789             : 
    3790     2662118 :   if (required_slots > 0) {
    3791             :     DCHECK(frame_access_state()->has_frame());
    3792     2048890 :     if (info()->IsWasm() && required_slots > 128) {
    3793             :       // For WebAssembly functions with big frames we have to do the stack
    3794             :       // overflow check before we construct the frame. Otherwise we may not
    3795             :       // have enough space on the stack to call the runtime for the stack
    3796             :       // overflow.
    3797           8 :       Label done;
    3798             : 
    3799             :       // If the frame is bigger than the stack, we throw the stack overflow
    3800             :       // exception unconditionally. Thereby we can avoid the integer overflow
    3801             :       // check in the condition code.
    3802           8 :       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
    3803           8 :         __ movq(kScratchRegister,
    3804             :                 FieldOperand(kWasmInstanceRegister,
    3805             :                              WasmInstanceObject::kRealStackLimitAddressOffset));
    3806          16 :         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
    3807             :         __ addq(kScratchRegister,
    3808             :                 Immediate(required_slots * kSystemPointerSize));
    3809             :         __ cmpq(rsp, kScratchRegister);
    3810           8 :         __ j(above_equal, &done);
    3811             :       }
    3812             : 
    3813           8 :       __ near_call(wasm::WasmCode::kWasmStackOverflow,
    3814           8 :                    RelocInfo::WASM_STUB_CALL);
    3815             :       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
    3816             :       RecordSafepoint(reference_map, Safepoint::kSimple,
    3817           8 :                       Safepoint::kNoLazyDeopt);
    3818           8 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
    3819           8 :       __ bind(&done);
    3820             :     }
    3821             : 
    3822             :     // Skip callee-saved and return slots, which are created below.
    3823     2048890 :     required_slots -= base::bits::CountPopulation(saves);
    3824             :     required_slots -= base::bits::CountPopulation(saves_fp) *
    3825     2048890 :                       (kQuadWordSize / kSystemPointerSize);
    3826     2048890 :     required_slots -= frame()->GetReturnSlotCount();
    3827     2048890 :     if (required_slots > 0) {
    3828     1872400 :       __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
    3829             :     }
    3830             :   }
    3831             : 
    3832     2662133 :   if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3833             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3834           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3835             :     // Adjust the stack pointer.
    3836           0 :     __ subq(rsp, Immediate(stack_size));
    3837             :     // Store the registers on the stack.
    3838             :     int slot_idx = 0;
    3839           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3840           0 :       if (!((1 << i) & saves_fp)) continue;
    3841           0 :       __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
    3842           0 :                 XMMRegister::from_code(i));
    3843           0 :       slot_idx++;
    3844             :     }
    3845             :   }
    3846             : 
    3847     2662133 :   if (saves != 0) {  // Save callee-saved registers.
    3848    29846157 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3849    14470864 :       if (!((1 << i) & saves)) continue;
    3850     4522145 :       __ pushq(Register::from_code(i));
    3851             :     }
    3852             :   }
    3853             : 
    3854             :   // Allocate return slots (located after callee-saved).
    3855     2662133 :   if (frame()->GetReturnSlotCount() > 0) {
    3856         667 :     __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
    3857             :   }
    3858     2662133 : }
    3859             : 
    3860     2999050 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
    3861             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3862             : 
    3863             :   // Restore registers.
    3864             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3865     2999050 :   if (saves != 0) {
    3866             :     const int returns = frame()->GetReturnSlotCount();
    3867      913797 :     if (returns != 0) {
    3868         656 :       __ addq(rsp, Immediate(returns * kSystemPointerSize));
    3869             :     }
    3870    30155301 :     for (int i = 0; i < Register::kNumRegisters; i++) {
    3871    14620752 :       if (!((1 << i) & saves)) continue;
    3872     4568985 :       __ popq(Register::from_code(i));
    3873             :     }
    3874             :   }
    3875             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3876     2999050 :   if (saves_fp != 0) {
    3877             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3878           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3879             :     // Load the registers from the stack.
    3880             :     int slot_idx = 0;
    3881           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3882           0 :       if (!((1 << i) & saves_fp)) continue;
    3883           0 :       __ movdqu(XMMRegister::from_code(i),
    3884           0 :                 Operand(rsp, kQuadWordSize * slot_idx));
    3885           0 :       slot_idx++;
    3886             :     }
    3887             :     // Adjust the stack pointer.
    3888           0 :     __ addq(rsp, Immediate(stack_size));
    3889             :   }
    3890             : 
    3891             :   unwinding_info_writer_.MarkBlockWillExit();
    3892             : 
    3893             :   // Might need rcx for scratch if pop_size is too big or if there is a variable
    3894             :   // pop count.
    3895             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
    3896             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
    3897     2998956 :   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
    3898             :   X64OperandConverter g(this, nullptr);
    3899     2998956 :   if (call_descriptor->IsCFunctionCall()) {
    3900      913797 :     AssembleDeconstructFrame();
    3901     2085159 :   } else if (frame_access_state()->has_frame()) {
    3902     4043031 :     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
    3903             :       // Canonicalize JSFunction return sites for now.
    3904     2001715 :       if (return_label_.is_bound()) {
    3905      369527 :         __ jmp(&return_label_);
    3906             :         return;
    3907             :       } else {
    3908     1632188 :         __ bind(&return_label_);
    3909     1632216 :         AssembleDeconstructFrame();
    3910             :       }
    3911             :     } else {
    3912       39900 :       AssembleDeconstructFrame();
    3913             :     }
    3914             :   }
    3915             : 
    3916     2630540 :   if (pop->IsImmediate()) {
    3917     5181504 :     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
    3918     2590840 :     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
    3919     5181680 :     __ Ret(static_cast<int>(pop_size), rcx);
    3920             :   } else {
    3921             :     Register pop_reg = g.ToRegister(pop);
    3922       39876 :     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
    3923       39876 :     __ popq(scratch_reg);
    3924       79752 :     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
    3925       39876 :     __ jmp(scratch_reg);
    3926             :   }
    3927             : }
    3928             : 
    3929     2639698 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
    3930             : 
    3931    37845491 : void CodeGenerator::AssembleMove(InstructionOperand* source,
    3932             :                                  InstructionOperand* destination) {
    3933             :   X64OperandConverter g(this, nullptr);
    3934             :   // Helper function to write the given constant to the dst register.
    3935    18417160 :   auto MoveConstantToRegister = [&](Register dst, Constant src) {
    3936    18417160 :     switch (src.type()) {
    3937             :       case Constant::kInt32: {
    3938     3995559 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3939    26273858 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3940             :         } else {
    3941             :           int32_t value = src.ToInt32();
    3942     3995559 :           if (value == 0) {
    3943     1108520 :             __ xorl(dst, dst);
    3944             :           } else {
    3945     2887039 :             __ movl(dst, Immediate(value));
    3946             :           }
    3947             :         }
    3948             :         break;
    3949             :       }
    3950             :       case Constant::kInt64:
    3951     2325758 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3952           0 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3953             :         } else {
    3954     2325758 :           __ Set(dst, src.ToInt64());
    3955             :         }
    3956             :         break;
    3957             :       case Constant::kFloat32:
    3958         736 :         __ MoveNumber(dst, src.ToFloat32());
    3959         368 :         break;
    3960             :       case Constant::kFloat64:
    3961     1521229 :         __ MoveNumber(dst, src.ToFloat64().value());
    3962     1521233 :         break;
    3963             :       case Constant::kExternalReference:
    3964     2715123 :         __ Move(dst, src.ToExternalReference());
    3965     2715137 :         break;
    3966             :       case Constant::kHeapObject: {
    3967     7856838 :         Handle<HeapObject> src_object = src.ToHeapObject();
    3968             :         RootIndex index;
    3969     7856837 :         if (IsMaterializableFromRoot(src_object, &index)) {
    3970     1901376 :           __ LoadRoot(dst, index);
    3971             :         } else {
    3972     5955464 :           __ Move(dst, src_object);
    3973             :         }
    3974             :         break;
    3975             :       }
    3976             :       case Constant::kDelayedStringConstant: {
    3977        2144 :         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
    3978        2144 :         __ MoveStringConstant(dst, src_constant);
    3979        2144 :         break;
    3980             :       }
    3981             :       case Constant::kRpoNumber:
    3982           0 :         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
    3983             :         break;
    3984             :     }
    3985    56262757 :   };
    3986             :   // Helper function to write the given constant to the stack.
    3987       38610 :   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
    3988       38610 :     if (!RelocInfo::IsWasmReference(src.rmode())) {
    3989       38610 :       switch (src.type()) {
    3990             :         case Constant::kInt32:
    3991       38610 :           __ movq(dst, Immediate(src.ToInt32()));
    3992       19510 :           return;
    3993             :         case Constant::kInt64:
    3994       13261 :           __ Set(dst, src.ToInt64());
    3995       13261 :           return;
    3996             :         default:
    3997             :           break;
    3998             :       }
    3999             :     }
    4000        5839 :     MoveConstantToRegister(kScratchRegister, src);
    4001        5839 :     __ movq(dst, kScratchRegister);
    4002    37845491 :   };
    4003             :   // Dispatch on the source and destination operand kinds.
    4004    37845491 :   switch (MoveType::InferMove(source, destination)) {
    4005             :     case MoveType::kRegisterToRegister:
    4006     4218879 :       if (source->IsRegister()) {
    4007     4076762 :         __ movq(g.ToRegister(destination), g.ToRegister(source));
    4008             :       } else {
    4009             :         DCHECK(source->IsFPRegister());
    4010             :         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
    4011             :       }
    4012             :       return;
    4013             :     case MoveType::kRegisterToStack: {
    4014     5689569 :       Operand dst = g.ToOperand(destination);
    4015     5689569 :       if (source->IsRegister()) {
    4016     5376083 :         __ movq(dst, g.ToRegister(source));
    4017             :       } else {
    4018             :         DCHECK(source->IsFPRegister());
    4019             :         XMMRegister src = g.ToDoubleRegister(source);
    4020             :         MachineRepresentation rep =
    4021             :             LocationOperand::cast(source)->representation();
    4022      313486 :         if (rep != MachineRepresentation::kSimd128) {
    4023             :           __ Movsd(dst, src);
    4024             :         } else {
    4025             :           __ Movups(dst, src);
    4026             :         }
    4027             :       }
    4028             :       return;
    4029             :     }
    4030             :     case MoveType::kStackToRegister: {
    4031     8891193 :       Operand src = g.ToOperand(source);
    4032     8891193 :       if (source->IsStackSlot()) {
    4033     8405090 :         __ movq(g.ToRegister(destination), src);
    4034             :       } else {
    4035             :         DCHECK(source->IsFPStackSlot());
    4036             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4037             :         MachineRepresentation rep =
    4038             :             LocationOperand::cast(source)->representation();
    4039      486103 :         if (rep != MachineRepresentation::kSimd128) {
    4040             :           __ Movsd(dst, src);
    4041             :         } else {
    4042             :           __ Movups(dst, src);
    4043             :         }
    4044             :       }
    4045             :       return;
    4046             :     }
    4047             :     case MoveType::kStackToStack: {
    4048       50185 :       Operand src = g.ToOperand(source);
    4049       50185 :       Operand dst = g.ToOperand(destination);
    4050       50185 :       if (source->IsStackSlot()) {
    4051             :         // Spill on demand to use a temporary register for memory-to-memory
    4052             :         // moves.
    4053       25139 :         __ movq(kScratchRegister, src);
    4054             :         __ movq(dst, kScratchRegister);
    4055             :       } else {
    4056             :         MachineRepresentation rep =
    4057             :             LocationOperand::cast(source)->representation();
    4058       25046 :         if (rep != MachineRepresentation::kSimd128) {
    4059             :           __ Movsd(kScratchDoubleReg, src);
    4060             :           __ Movsd(dst, kScratchDoubleReg);
    4061             :         } else {
    4062             :           DCHECK(source->IsSimd128StackSlot());
    4063             :           __ Movups(kScratchDoubleReg, src);
    4064             :           __ Movups(dst, kScratchDoubleReg);
    4065             :         }
    4066             :       }
    4067             :       return;
    4068             :     }
    4069             :     case MoveType::kConstantToRegister: {
    4070    18951215 :       Constant src = g.ToConstant(source);
    4071    18951439 :       if (destination->IsRegister()) {
    4072    18411400 :         MoveConstantToRegister(g.ToRegister(destination), src);
    4073             :       } else {
    4074             :         DCHECK(destination->IsFPRegister());
    4075      540039 :         XMMRegister dst = g.ToDoubleRegister(destination);
    4076      540039 :         if (src.type() == Constant::kFloat32) {
    4077             :           // TODO(turbofan): Can we do better here?
    4078      125126 :           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
    4079             :         } else {
    4080             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4081      414913 :           __ Move(dst, src.ToFloat64().AsUint64());
    4082             :         }
    4083             :       }
    4084             :       return;
    4085             :     }
    4086             :     case MoveType::kConstantToStack: {
    4087       45337 :       Constant src = g.ToConstant(source);
    4088       45337 :       Operand dst = g.ToOperand(destination);
    4089       45337 :       if (destination->IsStackSlot()) {
    4090       38610 :         MoveConstantToSlot(dst, src);
    4091             :       } else {
    4092             :         DCHECK(destination->IsFPStackSlot());
    4093        6727 :         if (src.type() == Constant::kFloat32) {
    4094        3060 :           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
    4095             :         } else {
    4096             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4097        3667 :           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
    4098             :           __ movq(dst, kScratchRegister);
    4099             :         }
    4100             :       }
    4101             :       return;
    4102             :     }
    4103             :   }
    4104           0 :   UNREACHABLE();
    4105             : }
    4106             : 
    4107       78622 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
    4108             :                                  InstructionOperand* destination) {
    4109             :   X64OperandConverter g(this, nullptr);
    4110             :   // Dispatch on the source and destination operand kinds.  Not all
    4111             :   // combinations are possible.
    4112       78622 :   switch (MoveType::InferSwap(source, destination)) {
    4113             :     case MoveType::kRegisterToRegister: {
    4114       68315 :       if (source->IsRegister()) {
    4115             :         Register src = g.ToRegister(source);
    4116             :         Register dst = g.ToRegister(destination);
    4117       65729 :         __ movq(kScratchRegister, src);
    4118             :         __ movq(src, dst);
    4119             :         __ movq(dst, kScratchRegister);
    4120             :       } else {
    4121             :         DCHECK(source->IsFPRegister());
    4122             :         XMMRegister src = g.ToDoubleRegister(source);
    4123             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4124             :         __ Movapd(kScratchDoubleReg, src);
    4125             :         __ Movapd(src, dst);
    4126             :         __ Movapd(dst, kScratchDoubleReg);
    4127             :       }
    4128             :       return;
    4129             :     }
    4130             :     case MoveType::kRegisterToStack: {
    4131        6543 :       if (source->IsRegister()) {
    4132             :         Register src = g.ToRegister(source);
    4133        1747 :         __ pushq(src);
    4134             :         frame_access_state()->IncreaseSPDelta(1);
    4135        1747 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4136        1747 :                                                          kSystemPointerSize);
    4137             :         __ movq(src, g.ToOperand(destination));
    4138             :         frame_access_state()->IncreaseSPDelta(-1);
    4139        1747 :         __ popq(g.ToOperand(destination));
    4140             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4141        1747 :                                                          -kSystemPointerSize);
    4142             :       } else {
    4143             :         DCHECK(source->IsFPRegister());
    4144             :         XMMRegister src = g.ToDoubleRegister(source);
    4145        4796 :         Operand dst = g.ToOperand(destination);
    4146             :         MachineRepresentation rep =
    4147             :             LocationOperand::cast(source)->representation();
    4148        4796 :         if (rep != MachineRepresentation::kSimd128) {
    4149             :           __ Movsd(kScratchDoubleReg, src);
    4150             :           __ Movsd(src, dst);
    4151             :           __ Movsd(dst, kScratchDoubleReg);
    4152             :         } else {
    4153             :           __ Movups(kScratchDoubleReg, src);
    4154             :           __ Movups(src, dst);
    4155             :           __ Movups(dst, kScratchDoubleReg);
    4156             :         }
    4157             :       }
    4158             :       return;
    4159             :     }
    4160             :     case MoveType::kStackToStack: {
    4161        3765 :       Operand src = g.ToOperand(source);
    4162        3765 :       Operand dst = g.ToOperand(destination);
    4163             :       MachineRepresentation rep =
    4164             :           LocationOperand::cast(source)->representation();
    4165        3765 :       if (rep != MachineRepresentation::kSimd128) {
    4166             :         Register tmp = kScratchRegister;
    4167        2929 :         __ movq(tmp, dst);
    4168        2929 :         __ pushq(src);  // Then use stack to copy src to destination.
    4169        2929 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4170        2929 :                                                          kSystemPointerSize);
    4171        2929 :         __ popq(dst);
    4172             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4173        2929 :                                                          -kSystemPointerSize);
    4174             :         __ movq(src, tmp);
    4175             :       } else {
    4176             :         // Without AVX, misaligned reads and writes will trap. Move using the
    4177             :         // stack, in two parts.
    4178         836 :         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
    4179         836 :         __ pushq(src);  // Then use stack to copy src to destination.
    4180         836 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4181         836 :                                                          kSystemPointerSize);
    4182         836 :         __ popq(dst);
    4183             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4184         836 :                                                          -kSystemPointerSize);
    4185         836 :         __ pushq(g.ToOperand(source, kSystemPointerSize));
    4186             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4187         836 :                                                          kSystemPointerSize);
    4188         836 :         __ popq(g.ToOperand(destination, kSystemPointerSize));
    4189             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4190         836 :                                                          -kSystemPointerSize);
    4191         836 :         __ movups(src, kScratchDoubleReg);
    4192             :       }
    4193             :       return;
    4194             :     }
    4195             :     default:
    4196           0 :       UNREACHABLE();
    4197             :       break;
    4198             :   }
    4199             : }
    4200             : 
    4201         344 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
    4202      399988 :   for (size_t index = 0; index < target_count; ++index) {
    4203      199822 :     __ dq(targets[index]);
    4204             :   }
    4205         344 : }
    4206             : 
    4207             : #undef __
    4208             : 
    4209             : }  // namespace compiler
    4210             : }  // namespace internal
    4211      121996 : }  // namespace v8

Generated by: LCOV version 1.10