LCOV - code coverage report
Current view: top level - src/compiler/backend/x64 - code-generator-x64.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1404 1572 89.3 %
Date: 2019-03-21 Functions: 41 58 70.7 %

          Line data    Source code
       1             : // Copyright 2013 the V8 project authors. All rights reserved.
       2             : // Use of this source code is governed by a BSD-style license that can be
       3             : // found in the LICENSE file.
       4             : 
       5             : #include "src/compiler/backend/code-generator.h"
       6             : 
       7             : #include <limits>
       8             : 
       9             : #include "src/base/overflowing-math.h"
      10             : #include "src/compiler/backend/code-generator-impl.h"
      11             : #include "src/compiler/backend/gap-resolver.h"
      12             : #include "src/compiler/node-matchers.h"
      13             : #include "src/compiler/osr.h"
      14             : #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
      15             : #include "src/macro-assembler.h"
      16             : #include "src/objects/smi.h"
      17             : #include "src/optimized-compilation-info.h"
      18             : #include "src/wasm/wasm-code-manager.h"
      19             : #include "src/wasm/wasm-objects.h"
      20             : #include "src/x64/assembler-x64.h"
      21             : 
      22             : namespace v8 {
      23             : namespace internal {
      24             : namespace compiler {
      25             : 
      26             : #define __ tasm()->
      27             : 
      28             : // Adds X64 specific methods for decoding operands.
      29             : class X64OperandConverter : public InstructionOperandConverter {
      30             :  public:
      31             :   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
      32             :       : InstructionOperandConverter(gen, instr) {}
      33             : 
      34             :   Immediate InputImmediate(size_t index) {
      35     5784831 :     return ToImmediate(instr_->InputAt(index));
      36             :   }
      37             : 
      38     1120542 :   Operand InputOperand(size_t index, int extra = 0) {
      39     2241135 :     return ToOperand(instr_->InputAt(index), extra);
      40             :   }
      41             : 
      42           0 :   Operand OutputOperand() { return ToOperand(instr_->Output()); }
      43             : 
      44     4794625 :   Immediate ToImmediate(InstructionOperand* operand) {
      45     4794625 :     Constant constant = ToConstant(operand);
      46     4794638 :     if (constant.type() == Constant::kFloat64) {
      47             :       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
      48      366325 :       return Immediate(0);
      49             :     }
      50     4428313 :     if (RelocInfo::IsWasmReference(constant.rmode())) {
      51           0 :       return Immediate(constant.ToInt32(), constant.rmode());
      52             :     }
      53     4428313 :     return Immediate(constant.ToInt32());
      54             :   }
      55             : 
      56             :   Operand ToOperand(InstructionOperand* op, int extra = 0) {
      57             :     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
      58    16096980 :     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
      59             :   }
      60             : 
      61    16109848 :   Operand SlotToOperand(int slot_index, int extra = 0) {
      62    16109848 :     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
      63             :     return Operand(offset.from_stack_pointer() ? rsp : rbp,
      64    32219644 :                    offset.offset() + extra);
      65             :   }
      66             : 
      67             :   static size_t NextOffset(size_t* offset) {
      68    16228266 :     size_t i = *offset;
      69    29190059 :     (*offset)++;
      70             :     return i;
      71             :   }
      72             : 
      73             :   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
      74             :     STATIC_ASSERT(0 == static_cast<int>(times_1));
      75             :     STATIC_ASSERT(1 == static_cast<int>(times_2));
      76             :     STATIC_ASSERT(2 == static_cast<int>(times_4));
      77             :     STATIC_ASSERT(3 == static_cast<int>(times_8));
      78     1504927 :     int scale = static_cast<int>(mode - one);
      79             :     DCHECK(scale >= 0 && scale < 4);
      80     1504927 :     return static_cast<ScaleFactor>(scale);
      81             :   }
      82             : 
      83    16228266 :   Operand MemoryOperand(size_t* offset) {
      84    16228266 :     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
      85    16228266 :     switch (mode) {
      86             :       case kMode_MR: {
      87     2698229 :         Register base = InputRegister(NextOffset(offset));
      88             :         int32_t disp = 0;
      89     2698229 :         return Operand(base, disp);
      90             :       }
      91             :       case kMode_MRI: {
      92    10942017 :         Register base = InputRegister(NextOffset(offset));
      93             :         int32_t disp = InputInt32(NextOffset(offset));
      94    10941997 :         return Operand(base, disp);
      95             :       }
      96             :       case kMode_MR1:
      97             :       case kMode_MR2:
      98             :       case kMode_MR4:
      99             :       case kMode_MR8: {
     100      807138 :         Register base = InputRegister(NextOffset(offset));
     101      807138 :         Register index = InputRegister(NextOffset(offset));
     102             :         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
     103             :         int32_t disp = 0;
     104      807138 :         return Operand(base, index, scale, disp);
     105             :       }
     106             :       case kMode_MR1I:
     107             :       case kMode_MR2I:
     108             :       case kMode_MR4I:
     109             :       case kMode_MR8I: {
     110      533482 :         Register base = InputRegister(NextOffset(offset));
     111      533482 :         Register index = InputRegister(NextOffset(offset));
     112             :         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
     113             :         int32_t disp = InputInt32(NextOffset(offset));
     114      533480 :         return Operand(base, index, scale, disp);
     115             :       }
     116             :       case kMode_M1: {
     117           0 :         Register base = InputRegister(NextOffset(offset));
     118             :         int32_t disp = 0;
     119           0 :         return Operand(base, disp);
     120             :       }
     121             :       case kMode_M2:
     122           0 :         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
     123             :         return Operand(no_reg, 0);
     124             :       case kMode_M4:
     125             :       case kMode_M8: {
     126       18633 :         Register index = InputRegister(NextOffset(offset));
     127             :         ScaleFactor scale = ScaleFor(kMode_M1, mode);
     128             :         int32_t disp = 0;
     129       18633 :         return Operand(index, scale, disp);
     130             :       }
     131             :       case kMode_M1I:
     132             :       case kMode_M2I:
     133             :       case kMode_M4I:
     134             :       case kMode_M8I: {
     135      145674 :         Register index = InputRegister(NextOffset(offset));
     136             :         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
     137             :         int32_t disp = InputInt32(NextOffset(offset));
     138      145674 :         return Operand(index, scale, disp);
     139             :       }
     140             :       case kMode_Root: {
     141     1083093 :         Register base = kRootRegister;
     142             :         int32_t disp = InputInt32(NextOffset(offset));
     143     1083094 :         return Operand(base, disp);
     144             :       }
     145             :       case kMode_None:
     146           0 :         UNREACHABLE();
     147             :     }
     148           0 :     UNREACHABLE();
     149             :   }
     150             : 
     151             :   Operand MemoryOperand(size_t first_input = 0) {
     152     9387526 :     return MemoryOperand(&first_input);
     153             :   }
     154             : };
     155             : 
     156             : namespace {
     157             : 
     158             : bool HasImmediateInput(Instruction* instr, size_t index) {
     159             :   return instr->InputAt(index)->IsImmediate();
     160             : }
     161             : 
     162           0 : class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
     163             :  public:
     164             :   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
     165         132 :       : OutOfLineCode(gen), result_(result) {}
     166             : 
     167         129 :   void Generate() final {
     168             :     __ Xorps(result_, result_);
     169             :     __ Divss(result_, result_);
     170         132 :   }
     171             : 
     172             :  private:
     173             :   XMMRegister const result_;
     174             : };
     175             : 
     176           0 : class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
     177             :  public:
     178             :   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
     179         589 :       : OutOfLineCode(gen), result_(result) {}
     180             : 
     181         587 :   void Generate() final {
     182             :     __ Xorpd(result_, result_);
     183             :     __ Divsd(result_, result_);
     184         588 :   }
     185             : 
     186             :  private:
     187             :   XMMRegister const result_;
     188             : };
     189             : 
     190           0 : class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
     191             :  public:
     192             :   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
     193             :                              XMMRegister input, StubCallMode stub_mode,
     194             :                              UnwindingInfoWriter* unwinding_info_writer)
     195             :       : OutOfLineCode(gen),
     196             :         result_(result),
     197             :         input_(input),
     198             :         stub_mode_(stub_mode),
     199             :         unwinding_info_writer_(unwinding_info_writer),
     200             :         isolate_(gen->isolate()),
     201       55434 :         zone_(gen->zone()) {}
     202             : 
     203       55422 :   void Generate() final {
     204       55422 :     __ subq(rsp, Immediate(kDoubleSize));
     205       55424 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     206       55424 :                                                       kDoubleSize);
     207      110849 :     __ Movsd(MemOperand(rsp, 0), input_);
     208       55424 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     209             :       // A direct call to a wasm runtime stub defined in this module.
     210             :       // Just encode the stub index. This will be patched when the code
     211             :       // is added to the native module and copied into wasm code space.
     212        1593 :       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
     213             :     } else {
     214      107662 :       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
     215             :     }
     216      110853 :     __ movl(result_, MemOperand(rsp, 0));
     217       55428 :     __ addq(rsp, Immediate(kDoubleSize));
     218       55425 :     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
     219       55425 :                                                       -kDoubleSize);
     220       55424 :   }
     221             : 
     222             :  private:
     223             :   Register const result_;
     224             :   XMMRegister const input_;
     225             :   StubCallMode stub_mode_;
     226             :   UnwindingInfoWriter* const unwinding_info_writer_;
     227             :   Isolate* isolate_;
     228             :   Zone* zone_;
     229             : };
     230             : 
     231           0 : class OutOfLineRecordWrite final : public OutOfLineCode {
     232             :  public:
     233             :   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
     234             :                        Register value, Register scratch0, Register scratch1,
     235             :                        RecordWriteMode mode, StubCallMode stub_mode)
     236             :       : OutOfLineCode(gen),
     237             :         object_(object),
     238             :         operand_(operand),
     239             :         value_(value),
     240             :         scratch0_(scratch0),
     241             :         scratch1_(scratch1),
     242             :         mode_(mode),
     243             :         stub_mode_(stub_mode),
     244      328905 :         zone_(gen->zone()) {}
     245             : 
     246      328903 :   void Generate() final {
     247      328903 :     if (mode_ > RecordWriteMode::kValueIsPointer) {
     248      260146 :       __ JumpIfSmi(value_, exit());
     249             :     }
     250             :     __ CheckPageFlag(value_, scratch0_,
     251             :                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
     252      328902 :                      exit());
     253      328905 :     __ leaq(scratch1_, operand_);
     254             : 
     255             :     RememberedSetAction const remembered_set_action =
     256      328906 :         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
     257      328906 :                                              : OMIT_REMEMBERED_SET;
     258             :     SaveFPRegsMode const save_fp_mode =
     259      328906 :         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
     260             : 
     261      328906 :     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
     262             :       // A direct call to a wasm runtime stub defined in this module.
     263             :       // Just encode the stub index. This will be patched when the code
     264             :       // is added to the native module and copied into wasm code space.
     265             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     266         312 :                              save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
     267             :     } else {
     268             :       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
     269      328594 :                              save_fp_mode);
     270             :     }
     271      328906 :   }
     272             : 
     273             :  private:
     274             :   Register const object_;
     275             :   Operand const operand_;
     276             :   Register const value_;
     277             :   Register const scratch0_;
     278             :   Register const scratch1_;
     279             :   RecordWriteMode const mode_;
     280             :   StubCallMode const stub_mode_;
     281             :   Zone* zone_;
     282             : };
     283             : 
     284           0 : class WasmOutOfLineTrap : public OutOfLineCode {
     285             :  public:
     286             :   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
     287      272259 :       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
     288             : 
     289       34507 :   void Generate() override {
     290       34507 :     X64OperandConverter i(gen_, instr_);
     291             :     TrapId trap_id =
     292       69021 :         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
     293             :     GenerateWithTrapId(trap_id);
     294       34529 :   }
     295             : 
     296             :  protected:
     297             :   CodeGenerator* gen_;
     298             : 
     299      272084 :   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
     300             : 
     301             :  private:
     302      271972 :   void GenerateCallToTrap(TrapId trap_id) {
     303      271972 :     if (!gen_->wasm_runtime_exception_support()) {
     304             :       // We cannot test calls to the runtime in cctest/test-run-wasm.
     305             :       // Therefore we emit a call to C here instead of a call to the runtime.
     306      153612 :       __ PrepareCallCFunction(0);
     307      153612 :       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
     308      153612 :                        0);
     309      153612 :       __ LeaveFrame(StackFrame::WASM_COMPILED);
     310      153612 :       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
     311             :       size_t pop_size =
     312      153612 :           call_descriptor->StackParameterCount() * kSystemPointerSize;
     313             :       // Use rcx as a scratch register, we return anyways immediately.
     314      153612 :       __ Ret(static_cast<int>(pop_size), rcx);
     315             :     } else {
     316      118410 :       gen_->AssembleSourcePosition(instr_);
     317             :       // A direct call to a wasm runtime stub defined in this module.
     318             :       // Just encode the stub index. This will be patched when the code
     319             :       // is added to the native module and copied into wasm code space.
     320      118574 :       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
     321             :       ReferenceMap* reference_map =
     322      118389 :           new (gen_->zone()) ReferenceMap(gen_->zone());
     323      118322 :       gen_->RecordSafepoint(reference_map, Safepoint::kSimple,
     324      118322 :                             Safepoint::kNoLazyDeopt);
     325      118732 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
     326             :     }
     327      272077 :   }
     328             : 
     329             :   Instruction* instr_;
     330             : };
     331             : 
     332           0 : class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
     333             :  public:
     334             :   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
     335      237347 :       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
     336             : 
     337      237433 :   void Generate() final {
     338      237433 :     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
     339      237570 :     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
     340      237578 :   }
     341             : 
     342             :  private:
     343             :   int pc_;
     344             : };
     345             : 
     346     6390153 : void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
     347             :                          InstructionCode opcode, Instruction* instr,
     348             :                          X64OperandConverter& i, int pc) {
     349             :   const MemoryAccessMode access_mode =
     350     6390153 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     351     6390153 :   if (access_mode == kMemoryAccessProtected) {
     352             :     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
     353             :   }
     354     6389724 : }
     355             : 
     356     5668491 : void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
     357             :                                    InstructionCode opcode, Instruction* instr,
     358             :                                    X64OperandConverter& i) {
     359             :   const MemoryAccessMode access_mode =
     360     5668491 :       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
     361     5668491 :   if (access_mode == kMemoryAccessPoisoned) {
     362             :     Register value = i.OutputRegister();
     363           0 :     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
     364             :   }
     365     5668491 : }
     366             : 
     367             : }  // namespace
     368             : 
     369             : #define ASSEMBLE_UNOP(asm_instr)         \
     370             :   do {                                   \
     371             :     if (instr->Output()->IsRegister()) { \
     372             :       __ asm_instr(i.OutputRegister());  \
     373             :     } else {                             \
     374             :       __ asm_instr(i.OutputOperand());   \
     375             :     }                                    \
     376             :   } while (false)
     377             : 
     378             : #define ASSEMBLE_BINOP(asm_instr)                                     \
     379             :   do {                                                                \
     380             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     381             :       size_t index = 1;                                               \
     382             :       Operand right = i.MemoryOperand(&index);                        \
     383             :       __ asm_instr(i.InputRegister(0), right);                        \
     384             :     } else {                                                          \
     385             :       if (HasImmediateInput(instr, 1)) {                              \
     386             :         if (instr->InputAt(0)->IsRegister()) {                        \
     387             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     388             :         } else {                                                      \
     389             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     390             :         }                                                             \
     391             :       } else {                                                        \
     392             :         if (instr->InputAt(1)->IsRegister()) {                        \
     393             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     394             :         } else {                                                      \
     395             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     396             :         }                                                             \
     397             :       }                                                               \
     398             :     }                                                                 \
     399             :   } while (false)
     400             : 
     401             : #define ASSEMBLE_COMPARE(asm_instr)                                   \
     402             :   do {                                                                \
     403             :     if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
     404             :       size_t index = 0;                                               \
     405             :       Operand left = i.MemoryOperand(&index);                         \
     406             :       if (HasImmediateInput(instr, index)) {                          \
     407             :         __ asm_instr(left, i.InputImmediate(index));                  \
     408             :       } else {                                                        \
     409             :         __ asm_instr(left, i.InputRegister(index));                   \
     410             :       }                                                               \
     411             :     } else {                                                          \
     412             :       if (HasImmediateInput(instr, 1)) {                              \
     413             :         if (instr->InputAt(0)->IsRegister()) {                        \
     414             :           __ asm_instr(i.InputRegister(0), i.InputImmediate(1));      \
     415             :         } else {                                                      \
     416             :           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));       \
     417             :         }                                                             \
     418             :       } else {                                                        \
     419             :         if (instr->InputAt(1)->IsRegister()) {                        \
     420             :           __ asm_instr(i.InputRegister(0), i.InputRegister(1));       \
     421             :         } else {                                                      \
     422             :           __ asm_instr(i.InputRegister(0), i.InputOperand(1));        \
     423             :         }                                                             \
     424             :       }                                                               \
     425             :     }                                                                 \
     426             :   } while (false)
     427             : 
     428             : #define ASSEMBLE_MULT(asm_instr)                              \
     429             :   do {                                                        \
     430             :     if (HasImmediateInput(instr, 1)) {                        \
     431             :       if (instr->InputAt(0)->IsRegister()) {                  \
     432             :         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
     433             :                      i.InputImmediate(1));                    \
     434             :       } else {                                                \
     435             :         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
     436             :                      i.InputImmediate(1));                    \
     437             :       }                                                       \
     438             :     } else {                                                  \
     439             :       if (instr->InputAt(1)->IsRegister()) {                  \
     440             :         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
     441             :       } else {                                                \
     442             :         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
     443             :       }                                                       \
     444             :     }                                                         \
     445             :   } while (false)
     446             : 
     447             : #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
     448             :   do {                                                                     \
     449             :     if (HasImmediateInput(instr, 1)) {                                     \
     450             :       if (instr->Output()->IsRegister()) {                                 \
     451             :         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
     452             :       } else {                                                             \
     453             :         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
     454             :       }                                                                    \
     455             :     } else {                                                               \
     456             :       if (instr->Output()->IsRegister()) {                                 \
     457             :         __ asm_instr##_cl(i.OutputRegister());                             \
     458             :       } else {                                                             \
     459             :         __ asm_instr##_cl(i.OutputOperand());                              \
     460             :       }                                                                    \
     461             :     }                                                                      \
     462             :   } while (false)
     463             : 
     464             : #define ASSEMBLE_MOVX(asm_instr)                            \
     465             :   do {                                                      \
     466             :     if (instr->addressing_mode() != kMode_None) {           \
     467             :       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
     468             :     } else if (instr->InputAt(0)->IsRegister()) {           \
     469             :       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
     470             :     } else {                                                \
     471             :       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
     472             :     }                                                       \
     473             :   } while (false)
     474             : 
     475             : #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
     476             :   do {                                                                  \
     477             :     if (instr->InputAt(1)->IsFPRegister()) {                            \
     478             :       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
     479             :     } else {                                                            \
     480             :       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
     481             :     }                                                                   \
     482             :   } while (false)
     483             : 
     484             : #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
     485             :   do {                                                                  \
     486             :     if (instr->InputAt(0)->IsFPRegister()) {                            \
     487             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
     488             :     } else {                                                            \
     489             :       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
     490             :     }                                                                   \
     491             :   } while (false)
     492             : 
     493             : #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
     494             :   do {                                                                 \
     495             :     CpuFeatureScope avx_scope(tasm(), AVX);                            \
     496             :     if (instr->InputAt(1)->IsFPRegister()) {                           \
     497             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     498             :                    i.InputDoubleRegister(1));                          \
     499             :     } else {                                                           \
     500             :       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
     501             :                    i.InputOperand(1));                                 \
     502             :     }                                                                  \
     503             :   } while (false)
     504             : 
     505             : #define ASSEMBLE_IEEE754_BINOP(name)                                     \
     506             :   do {                                                                   \
     507             :     __ PrepareCallCFunction(2);                                          \
     508             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
     509             :   } while (false)
     510             : 
     511             : #define ASSEMBLE_IEEE754_UNOP(name)                                      \
     512             :   do {                                                                   \
     513             :     __ PrepareCallCFunction(1);                                          \
     514             :     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
     515             :   } while (false)
     516             : 
     517             : #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     518             :   do {                                                          \
     519             :     Label binop;                                                \
     520             :     __ bind(&binop);                                            \
     521             :     __ mov_inst(rax, i.MemoryOperand(1));                       \
     522             :     __ movl(i.TempRegister(0), rax);                            \
     523             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
     524             :     __ lock();                                                  \
     525             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
     526             :     __ j(not_equal, &binop);                                    \
     527             :   } while (false)
     528             : 
     529             : #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
     530             :   do {                                                            \
     531             :     Label binop;                                                  \
     532             :     __ bind(&binop);                                              \
     533             :     __ mov_inst(rax, i.MemoryOperand(1));                         \
     534             :     __ movq(i.TempRegister(0), rax);                              \
     535             :     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
     536             :     __ lock();                                                    \
     537             :     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
     538             :     __ j(not_equal, &binop);                                      \
     539             :   } while (false)
     540             : 
     541             : #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
     542             :   do {                                                       \
     543             :     if (instr->InputAt(index)->IsSimd128Register()) {        \
     544             :       __ opcode(dst_operand, i.InputSimd128Register(index)); \
     545             :     } else {                                                 \
     546             :       __ opcode(dst_operand, i.InputOperand(index));         \
     547             :     }                                                        \
     548             :   } while (false)
     549             : 
     550             : #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
     551             :   do {                                                            \
     552             :     if (instr->InputAt(index)->IsSimd128Register()) {             \
     553             :       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
     554             :     } else {                                                      \
     555             :       __ opcode(dst_operand, i.InputOperand(index), imm);         \
     556             :     }                                                             \
     557             :   } while (false)
     558             : 
     559             : #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
     560             :   do {                                                   \
     561             :     XMMRegister dst = i.OutputSimd128Register();         \
     562             :     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
     563             :     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
     564             :     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
     565             :   } while (false)
     566             : 
     567             : #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm)                  \
     568             :   do {                                                                    \
     569             :     CpuFeatureScope sse_scope(tasm(), SSELevel);                          \
     570             :     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
     571             :     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
     572             :   } while (false)
     573             : 
     574     2521770 : void CodeGenerator::AssembleDeconstructFrame() {
     575     2521770 :   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
     576     2521808 :   __ movq(rsp, rbp);
     577     2522162 :   __ popq(rbp);
     578     2522079 : }
     579             : 
     580      119712 : void CodeGenerator::AssemblePrepareTailCall() {
     581      119712 :   if (frame_access_state()->has_frame()) {
     582      139312 :     __ movq(rbp, MemOperand(rbp, 0));
     583             :   }
     584             :   frame_access_state()->SetFrameAccessToSP();
     585      119712 : }
     586             : 
     587        1120 : void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
     588             :                                                      Register scratch1,
     589             :                                                      Register scratch2,
     590             :                                                      Register scratch3) {
     591             :   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
     592        1120 :   Label done;
     593             : 
     594             :   // Check if current frame is an arguments adaptor frame.
     595        2240 :   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
     596        1120 :           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
     597        1120 :   __ j(not_equal, &done, Label::kNear);
     598             : 
     599             :   // Load arguments count from current arguments adaptor frame (note, it
     600             :   // does not include receiver).
     601        1120 :   Register caller_args_count_reg = scratch1;
     602        2240 :   __ SmiUntag(caller_args_count_reg,
     603        1120 :               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
     604             : 
     605             :   ParameterCount callee_args_count(args_reg);
     606             :   __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
     607        1120 :                         scratch3);
     608        1120 :   __ bind(&done);
     609        1120 : }
     610             : 
     611             : namespace {
     612             : 
     613      270668 : void AdjustStackPointerForTailCall(Assembler* assembler,
     614             :                                    FrameAccessState* state,
     615             :                                    int new_slot_above_sp,
     616             :                                    bool allow_shrinkage = true) {
     617             :   int current_sp_offset = state->GetSPToFPSlotCount() +
     618      270668 :                           StandardFrameConstants::kFixedSlotCountAboveFp;
     619      270668 :   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
     620      270668 :   if (stack_slot_delta > 0) {
     621         776 :     assembler->subq(rsp, Immediate(stack_slot_delta * kSystemPointerSize));
     622             :     state->IncreaseSPDelta(stack_slot_delta);
     623      269892 :   } else if (allow_shrinkage && stack_slot_delta < 0) {
     624       69304 :     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
     625             :     state->IncreaseSPDelta(stack_slot_delta);
     626             :   }
     627      270668 : }
     628             : 
     629        2152 : void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
     630        2152 :   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
     631        2152 :   assembler->movq(kScratchRegister, shuffle_mask);
     632        2152 :   assembler->Push(kScratchRegister);
     633        2152 :   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
     634             :   assembler->movq(kScratchRegister, shuffle_mask);
     635        2152 :   assembler->Push(kScratchRegister);
     636        2152 : }
     637             : 
     638             : }  // namespace
     639             : 
     640      119724 : void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
     641             :                                               int first_unused_stack_slot) {
     642      119724 :   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
     643             :   ZoneVector<MoveOperands*> pushes(zone());
     644      119724 :   GetPushCompatibleMoves(instr, flags, &pushes);
     645             : 
     646      132652 :   if (!pushes.empty() &&
     647       25856 :       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
     648             :        first_unused_stack_slot)) {
     649             :     X64OperandConverter g(this, instr);
     650       44148 :     for (auto move : pushes) {
     651             :       LocationOperand destination_location(
     652             :           LocationOperand::cast(move->destination()));
     653       31220 :       InstructionOperand source(move->source());
     654             :       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     655       31220 :                                     destination_location.index());
     656       31220 :       if (source.IsStackSlot()) {
     657             :         LocationOperand source_location(LocationOperand::cast(source));
     658       12940 :         __ Push(g.SlotToOperand(source_location.index()));
     659       18280 :       } else if (source.IsRegister()) {
     660             :         LocationOperand source_location(LocationOperand::cast(source));
     661       18280 :         __ Push(source_location.GetRegister());
     662           0 :       } else if (source.IsImmediate()) {
     663           0 :         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
     664             :       } else {
     665             :         // Pushes of non-scalar data types is not supported.
     666           0 :         UNIMPLEMENTED();
     667             :       }
     668             :       frame_access_state()->IncreaseSPDelta(1);
     669             :       move->Eliminate();
     670             :     }
     671             :   }
     672             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     673      119724 :                                 first_unused_stack_slot, false);
     674      119724 : }
     675             : 
     676      119724 : void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
     677             :                                              int first_unused_stack_slot) {
     678             :   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
     679      119724 :                                 first_unused_stack_slot);
     680      119724 : }
     681             : 
     682             : // Check that {kJavaScriptCallCodeStartRegister} is correct.
     683         114 : void CodeGenerator::AssembleCodeStartRegisterCheck() {
     684         114 :   __ ComputeCodeStartAddress(rbx);
     685         114 :   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
     686         114 :   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
     687         114 : }
     688             : 
     689             : // Check if the code object is marked for deoptimization. If it is, then it
     690             : // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
     691             : // to:
     692             : //    1. read from memory the word that contains that bit, which can be found in
     693             : //       the flags in the referenced {CodeDataContainer} object;
     694             : //    2. test kMarkedForDeoptimizationBit in those flags; and
     695             : //    3. if it is not zero then it jumps to the builtin.
     696      463823 : void CodeGenerator::BailoutIfDeoptimized() {
     697             :   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
     698      927643 :   __ LoadTaggedPointerField(rbx,
     699      463829 :                             Operand(kJavaScriptCallCodeStartRegister, offset));
     700      463830 :   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
     701             :            Immediate(1 << Code::kMarkedForDeoptimizationBit));
     702      463821 :   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
     703      463828 :           RelocInfo::CODE_TARGET, not_zero);
     704      463826 : }
     705             : 
     706           0 : void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
     707             :   // Set a mask which has all bits set in the normal case, but has all
     708             :   // bits cleared if we are speculatively executing the wrong PC.
     709           0 :   __ ComputeCodeStartAddress(rbx);
     710           0 :   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
     711             :   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
     712             :   __ movq(rbx, Immediate(-1));
     713           0 :   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
     714           0 : }
     715             : 
     716           0 : void CodeGenerator::AssembleRegisterArgumentPoisoning() {
     717           0 :   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
     718             :   __ andq(kContextRegister, kSpeculationPoisonRegister);
     719             :   __ andq(rsp, kSpeculationPoisonRegister);
     720           0 : }
     721             : 
     722             : // Assembles an instruction after register allocation, producing machine code.
     723    69637294 : CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
     724             :     Instruction* instr) {
     725             :   X64OperandConverter i(this, instr);
     726             :   InstructionCode opcode = instr->opcode();
     727    69637294 :   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
     728    69637294 :   switch (arch_opcode) {
     729             :     case kArchCallCodeObject: {
     730     5138147 :       if (HasImmediateInput(instr, 0)) {
     731     4763048 :         Handle<Code> code = i.InputCode(0);
     732     4763048 :         __ Call(code, RelocInfo::CODE_TARGET);
     733             :       } else {
     734      375114 :         Register reg = i.InputRegister(0);
     735             :         DCHECK_IMPLIES(
     736             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     737             :             reg == kJavaScriptCallCodeStartRegister);
     738      375114 :         __ LoadCodeObjectEntry(reg, reg);
     739      375115 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     740           0 :           __ RetpolineCall(reg);
     741             :         } else {
     742      375115 :           __ call(reg);
     743             :         }
     744             :       }
     745     5138171 :       RecordCallPosition(instr);
     746             :       frame_access_state()->ClearSPDelta();
     747             :       break;
     748             :     }
     749             :     case kArchCallBuiltinPointer: {
     750             :       DCHECK(!HasImmediateInput(instr, 0));
     751        3584 :       Register builtin_pointer = i.InputRegister(0);
     752        3584 :       __ CallBuiltinPointer(builtin_pointer);
     753        3584 :       RecordCallPosition(instr);
     754             :       frame_access_state()->ClearSPDelta();
     755             :       break;
     756             :     }
     757             :     case kArchCallWasmFunction: {
     758      969829 :       if (HasImmediateInput(instr, 0)) {
     759       92978 :         Constant constant = i.ToConstant(instr->InputAt(0));
     760       92984 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     761       92984 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     762       92989 :           __ near_call(wasm_code, constant.rmode());
     763             :         } else {
     764           0 :           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     765           0 :             __ RetpolineCall(wasm_code, constant.rmode());
     766             :           } else {
     767           0 :             __ Call(wasm_code, constant.rmode());
     768             :           }
     769             :         }
     770             :       } else {
     771      876851 :         Register reg = i.InputRegister(0);
     772      876851 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     773           0 :           __ RetpolineCall(reg);
     774             :         } else {
     775      876851 :           __ call(reg);
     776             :         }
     777             :       }
     778      969831 :       RecordCallPosition(instr);
     779             :       frame_access_state()->ClearSPDelta();
     780             :       break;
     781             :     }
     782             :     case kArchTailCallCodeObjectFromJSFunction:
     783             :     case kArchTailCallCodeObject: {
     784       36696 :       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
     785             :         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
     786             :                                          i.TempRegister(0), i.TempRegister(1),
     787        1120 :                                          i.TempRegister(2));
     788             :       }
     789       36696 :       if (HasImmediateInput(instr, 0)) {
     790       31072 :         Handle<Code> code = i.InputCode(0);
     791       31072 :         __ Jump(code, RelocInfo::CODE_TARGET);
     792             :       } else {
     793        5624 :         Register reg = i.InputRegister(0);
     794             :         DCHECK_IMPLIES(
     795             :             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     796             :             reg == kJavaScriptCallCodeStartRegister);
     797        5624 :         __ LoadCodeObjectEntry(reg, reg);
     798        5624 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     799           0 :           __ RetpolineJump(reg);
     800             :         } else {
     801        5624 :           __ jmp(reg);
     802             :         }
     803             :       }
     804             :       unwinding_info_writer_.MarkBlockWillExit();
     805             :       frame_access_state()->ClearSPDelta();
     806       36696 :       frame_access_state()->SetFrameAccessToDefault();
     807       36696 :       break;
     808             :     }
     809             :     case kArchTailCallWasm: {
     810         248 :       if (HasImmediateInput(instr, 0)) {
     811         144 :         Constant constant = i.ToConstant(instr->InputAt(0));
     812         144 :         Address wasm_code = static_cast<Address>(constant.ToInt64());
     813         144 :         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
     814         144 :           __ near_jmp(wasm_code, constant.rmode());
     815             :         } else {
     816             :           __ Move(kScratchRegister, wasm_code, constant.rmode());
     817           0 :           __ jmp(kScratchRegister);
     818             :         }
     819             :       } else {
     820         104 :         Register reg = i.InputRegister(0);
     821         104 :         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     822           0 :           __ RetpolineJump(reg);
     823             :         } else {
     824         104 :           __ jmp(reg);
     825             :         }
     826             :       }
     827             :       unwinding_info_writer_.MarkBlockWillExit();
     828             :       frame_access_state()->ClearSPDelta();
     829         248 :       frame_access_state()->SetFrameAccessToDefault();
     830         248 :       break;
     831             :     }
     832             :     case kArchTailCallAddress: {
     833       82768 :       CHECK(!HasImmediateInput(instr, 0));
     834       82768 :       Register reg = i.InputRegister(0);
     835             :       DCHECK_IMPLIES(
     836             :           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
     837             :           reg == kJavaScriptCallCodeStartRegister);
     838       82768 :       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
     839           0 :         __ RetpolineJump(reg);
     840             :       } else {
     841       82768 :         __ jmp(reg);
     842             :       }
     843             :       unwinding_info_writer_.MarkBlockWillExit();
     844             :       frame_access_state()->ClearSPDelta();
     845       82768 :       frame_access_state()->SetFrameAccessToDefault();
     846             :       break;
     847             :     }
     848             :     case kArchCallJSFunction: {
     849             :       Register func = i.InputRegister(0);
     850       23828 :       if (FLAG_debug_code) {
     851             :         // Check the function's context matches the context argument.
     852           8 :         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
     853           8 :         __ Assert(equal, AbortReason::kWrongFunctionContext);
     854             :       }
     855             :       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
     856             :       __ LoadTaggedPointerField(rcx,
     857       23828 :                                 FieldOperand(func, JSFunction::kCodeOffset));
     858       23828 :       __ CallCodeObject(rcx);
     859             :       frame_access_state()->ClearSPDelta();
     860       23828 :       RecordCallPosition(instr);
     861             :       break;
     862             :     }
     863             :     case kArchPrepareCallCFunction: {
     864             :       // Frame alignment requires using FP-relative frame addressing.
     865             :       frame_access_state()->SetFrameAccessToFP();
     866       25620 :       int const num_parameters = MiscField::decode(instr->opcode());
     867       25620 :       __ PrepareCallCFunction(num_parameters);
     868       25620 :       break;
     869             :     }
     870             :     case kArchSaveCallerRegisters: {
     871             :       fp_mode_ =
     872         676 :           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
     873             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     874             :       // kReturnRegister0 should have been saved before entering the stub.
     875         676 :       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
     876             :       DCHECK(IsAligned(bytes, kSystemPointerSize));
     877             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     878         676 :       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     879             :       DCHECK(!caller_registers_saved_);
     880         676 :       caller_registers_saved_ = true;
     881         676 :       break;
     882             :     }
     883             :     case kArchRestoreCallerRegisters: {
     884             :       DCHECK(fp_mode_ ==
     885             :              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
     886             :       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
     887             :       // Don't overwrite the returned value.
     888        1352 :       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
     889         676 :       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
     890             :       DCHECK_EQ(0, frame_access_state()->sp_delta());
     891             :       DCHECK(caller_registers_saved_);
     892         676 :       caller_registers_saved_ = false;
     893         676 :       break;
     894             :     }
     895             :     case kArchPrepareTailCall:
     896      119712 :       AssemblePrepareTailCall();
     897      119712 :       break;
     898             :     case kArchCallCFunction: {
     899             :       int const num_parameters = MiscField::decode(instr->opcode());
     900       25620 :       if (HasImmediateInput(instr, 0)) {
     901       24480 :         ExternalReference ref = i.InputExternalReference(0);
     902       24480 :         __ CallCFunction(ref, num_parameters);
     903             :       } else {
     904        1140 :         Register func = i.InputRegister(0);
     905        1140 :         __ CallCFunction(func, num_parameters);
     906             :       }
     907       25620 :       frame_access_state()->SetFrameAccessToDefault();
     908             :       // Ideally, we should decrement SP delta to match the change of stack
     909             :       // pointer in CallCFunction. However, for certain architectures (e.g.
     910             :       // ARM), there may be more strict alignment requirement, causing old SP
     911             :       // to be saved on the stack. In those cases, we can not calculate the SP
     912             :       // delta statically.
     913             :       frame_access_state()->ClearSPDelta();
     914       25620 :       if (caller_registers_saved_) {
     915             :         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
     916             :         // Here, we assume the sequence to be:
     917             :         //   kArchSaveCallerRegisters;
     918             :         //   kArchCallCFunction;
     919             :         //   kArchRestoreCallerRegisters;
     920             :         int bytes =
     921        1352 :             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
     922         676 :         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
     923             :       }
     924             :       // TODO(tebbi): Do we need an lfence here?
     925             :       break;
     926             :     }
     927             :     case kArchJmp:
     928     5047095 :       AssembleArchJump(i.InputRpo(0));
     929     5047109 :       break;
     930             :     case kArchBinarySearchSwitch:
     931       33871 :       AssembleArchBinarySearchSwitch(instr);
     932       33872 :       break;
     933             :     case kArchLookupSwitch:
     934           0 :       AssembleArchLookupSwitch(instr);
     935           0 :       break;
     936             :     case kArchTableSwitch:
     937         307 :       AssembleArchTableSwitch(instr);
     938         307 :       break;
     939             :     case kArchComment:
     940           4 :       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
     941           4 :       break;
     942             :     case kArchDebugAbort:
     943             :       DCHECK(i.InputRegister(0) == rdx);
     944           0 :       if (!frame_access_state()->has_frame()) {
     945             :         // We don't actually want to generate a pile of code for this, so just
     946             :         // claim there is a stack frame, without generating one.
     947           0 :         FrameScope scope(tasm(), StackFrame::NONE);
     948           0 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     949           0 :                 RelocInfo::CODE_TARGET);
     950             :       } else {
     951           0 :         __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
     952           0 :                 RelocInfo::CODE_TARGET);
     953             :       }
     954           0 :       __ int3();
     955             :       unwinding_info_writer_.MarkBlockWillExit();
     956             :       break;
     957             :     case kArchDebugBreak:
     958      266339 :       __ int3();
     959      266339 :       break;
     960             :     case kArchThrowTerminator:
     961             :       unwinding_info_writer_.MarkBlockWillExit();
     962             :       break;
     963             :     case kArchNop:
     964             :       // don't emit code for nops.
     965             :       break;
     966             :     case kArchDeoptimize: {
     967             :       int deopt_state_id =
     968       43684 :           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
     969             :       CodeGenResult result =
     970       43684 :           AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
     971       43684 :       if (result != kSuccess) return result;
     972             :       unwinding_info_writer_.MarkBlockWillExit();
     973             :       break;
     974             :     }
     975             :     case kArchRet:
     976     2786995 :       AssembleReturn(instr->InputAt(0));
     977     2787109 :       break;
     978             :     case kArchStackPointer:
     979           0 :       __ movq(i.OutputRegister(), rsp);
     980             :       break;
     981             :     case kArchFramePointer:
     982       32660 :       __ movq(i.OutputRegister(), rbp);
     983             :       break;
     984             :     case kArchParentFramePointer:
     985       50404 :       if (frame_access_state()->has_frame()) {
     986       78132 :         __ movq(i.OutputRegister(), Operand(rbp, 0));
     987             :       } else {
     988       24360 :         __ movq(i.OutputRegister(), rbp);
     989             :       }
     990             :       break;
     991             :     case kArchTruncateDoubleToI: {
     992             :       auto result = i.OutputRegister();
     993             :       auto input = i.InputDoubleRegister(0);
     994             :       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
     995             :           this, result, input, DetermineStubCallMode(),
     996      110861 :           &unwinding_info_writer_);
     997             :       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
     998             :       // use of Cvttsd2siq requires the movl below to avoid sign extension.
     999       55425 :       __ Cvttsd2siq(result, input);
    1000       55432 :       __ cmpq(result, Immediate(1));
    1001       55433 :       __ j(overflow, ool->entry());
    1002       55434 :       __ bind(ool->exit());
    1003             :       __ movl(result, result);
    1004             :       break;
    1005             :     }
    1006             :     case kArchStoreWithWriteBarrier: {
    1007             :       RecordWriteMode mode =
    1008             :           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
    1009             :       Register object = i.InputRegister(0);
    1010      328902 :       size_t index = 0;
    1011      328902 :       Operand operand = i.MemoryOperand(&index);
    1012      328903 :       Register value = i.InputRegister(index);
    1013             :       Register scratch0 = i.TempRegister(0);
    1014             :       Register scratch1 = i.TempRegister(1);
    1015             :       auto ool = new (zone())
    1016             :           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
    1017      657805 :                                mode, DetermineStubCallMode());
    1018      328902 :       __ StoreTaggedField(operand, value);
    1019             :       __ CheckPageFlag(object, scratch0,
    1020             :                        MemoryChunk::kPointersFromHereAreInterestingMask,
    1021      328905 :                        not_zero, ool->entry());
    1022      328904 :       __ bind(ool->exit());
    1023             :       break;
    1024             :     }
    1025             :     case kArchWordPoisonOnSpeculation:
    1026             :       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
    1027           0 :       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
    1028             :       break;
    1029             :     case kLFence:
    1030           0 :       __ lfence();
    1031           0 :       break;
    1032             :     case kArchStackSlot: {
    1033             :       FrameOffset offset =
    1034      366535 :           frame_access_state()->GetFrameOffset(i.InputInt32(0));
    1035      366536 :       Register base = offset.from_stack_pointer() ? rsp : rbp;
    1036     1099617 :       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
    1037             :       break;
    1038             :     }
    1039             :     case kIeee754Float64Acos:
    1040         116 :       ASSEMBLE_IEEE754_UNOP(acos);
    1041         116 :       break;
    1042             :     case kIeee754Float64Acosh:
    1043         116 :       ASSEMBLE_IEEE754_UNOP(acosh);
    1044         116 :       break;
    1045             :     case kIeee754Float64Asin:
    1046         116 :       ASSEMBLE_IEEE754_UNOP(asin);
    1047         116 :       break;
    1048             :     case kIeee754Float64Asinh:
    1049         116 :       ASSEMBLE_IEEE754_UNOP(asinh);
    1050         116 :       break;
    1051             :     case kIeee754Float64Atan:
    1052         133 :       ASSEMBLE_IEEE754_UNOP(atan);
    1053         133 :       break;
    1054             :     case kIeee754Float64Atanh:
    1055         116 :       ASSEMBLE_IEEE754_UNOP(atanh);
    1056         116 :       break;
    1057             :     case kIeee754Float64Atan2:
    1058         129 :       ASSEMBLE_IEEE754_BINOP(atan2);
    1059         129 :       break;
    1060             :     case kIeee754Float64Cbrt:
    1061         116 :       ASSEMBLE_IEEE754_UNOP(cbrt);
    1062         116 :       break;
    1063             :     case kIeee754Float64Cos:
    1064         271 :       ASSEMBLE_IEEE754_UNOP(cos);
    1065         271 :       break;
    1066             :     case kIeee754Float64Cosh:
    1067         123 :       ASSEMBLE_IEEE754_UNOP(cosh);
    1068         123 :       break;
    1069             :     case kIeee754Float64Exp:
    1070         148 :       ASSEMBLE_IEEE754_UNOP(exp);
    1071         148 :       break;
    1072             :     case kIeee754Float64Expm1:
    1073         123 :       ASSEMBLE_IEEE754_UNOP(expm1);
    1074         123 :       break;
    1075             :     case kIeee754Float64Log:
    1076         284 :       ASSEMBLE_IEEE754_UNOP(log);
    1077         284 :       break;
    1078             :     case kIeee754Float64Log1p:
    1079         116 :       ASSEMBLE_IEEE754_UNOP(log1p);
    1080         116 :       break;
    1081             :     case kIeee754Float64Log2:
    1082         116 :       ASSEMBLE_IEEE754_UNOP(log2);
    1083         116 :       break;
    1084             :     case kIeee754Float64Log10:
    1085         116 :       ASSEMBLE_IEEE754_UNOP(log10);
    1086         116 :       break;
    1087             :     case kIeee754Float64Pow:
    1088         336 :       ASSEMBLE_IEEE754_BINOP(pow);
    1089         336 :       break;
    1090             :     case kIeee754Float64Sin:
    1091         268 :       ASSEMBLE_IEEE754_UNOP(sin);
    1092         268 :       break;
    1093             :     case kIeee754Float64Sinh:
    1094         123 :       ASSEMBLE_IEEE754_UNOP(sinh);
    1095         123 :       break;
    1096             :     case kIeee754Float64Tan:
    1097         168 :       ASSEMBLE_IEEE754_UNOP(tan);
    1098         168 :       break;
    1099             :     case kIeee754Float64Tanh:
    1100         123 :       ASSEMBLE_IEEE754_UNOP(tanh);
    1101         123 :       break;
    1102             :     case kX64Add32:
    1103      984572 :       ASSEMBLE_BINOP(addl);
    1104             :       break;
    1105             :     case kX64Add:
    1106      126091 :       ASSEMBLE_BINOP(addq);
    1107             :       break;
    1108             :     case kX64Sub32:
    1109      213569 :       ASSEMBLE_BINOP(subl);
    1110             :       break;
    1111             :     case kX64Sub:
    1112      169118 :       ASSEMBLE_BINOP(subq);
    1113             :       break;
    1114             :     case kX64And32:
    1115      710762 :       ASSEMBLE_BINOP(andl);
    1116             :       break;
    1117             :     case kX64And:
    1118     1078401 :       ASSEMBLE_BINOP(andq);
    1119             :       break;
    1120             :     case kX64Cmp8:
    1121       35053 :       ASSEMBLE_COMPARE(cmpb);
    1122             :       break;
    1123             :     case kX64Cmp16:
    1124     1291848 :       ASSEMBLE_COMPARE(cmpw);
    1125             :       break;
    1126             :     case kX64Cmp32:
    1127     4672296 :       ASSEMBLE_COMPARE(cmpl);
    1128             :       break;
    1129             :     case kX64Cmp:
    1130     8158722 :       ASSEMBLE_COMPARE(cmpq);
    1131             :       break;
    1132             :     case kX64Test8:
    1133      317473 :       ASSEMBLE_COMPARE(testb);
    1134             :       break;
    1135             :     case kX64Test16:
    1136       98217 :       ASSEMBLE_COMPARE(testw);
    1137             :       break;
    1138             :     case kX64Test32:
    1139      470271 :       ASSEMBLE_COMPARE(testl);
    1140             :       break;
    1141             :     case kX64Test:
    1142     2643067 :       ASSEMBLE_COMPARE(testq);
    1143             :       break;
    1144             :     case kX64Imul32:
    1145      157294 :       ASSEMBLE_MULT(imull);
    1146             :       break;
    1147             :     case kX64Imul:
    1148       62807 :       ASSEMBLE_MULT(imulq);
    1149             :       break;
    1150             :     case kX64ImulHigh32:
    1151        4211 :       if (instr->InputAt(1)->IsRegister()) {
    1152        4211 :         __ imull(i.InputRegister(1));
    1153             :       } else {
    1154           0 :         __ imull(i.InputOperand(1));
    1155             :       }
    1156             :       break;
    1157             :     case kX64UmulHigh32:
    1158        1635 :       if (instr->InputAt(1)->IsRegister()) {
    1159        1635 :         __ mull(i.InputRegister(1));
    1160             :       } else {
    1161           0 :         __ mull(i.InputOperand(1));
    1162             :       }
    1163             :       break;
    1164             :     case kX64Idiv32:
    1165       31933 :       __ cdq();
    1166             :       __ idivl(i.InputRegister(1));
    1167             :       break;
    1168             :     case kX64Idiv:
    1169        2732 :       __ cqo();
    1170             :       __ idivq(i.InputRegister(1));
    1171             :       break;
    1172             :     case kX64Udiv32:
    1173       29050 :       __ xorl(rdx, rdx);
    1174             :       __ divl(i.InputRegister(1));
    1175             :       break;
    1176             :     case kX64Udiv:
    1177        1768 :       __ xorq(rdx, rdx);
    1178             :       __ divq(i.InputRegister(1));
    1179             :       break;
    1180             :     case kX64Not:
    1181          88 :       ASSEMBLE_UNOP(notq);
    1182             :       break;
    1183             :     case kX64Not32:
    1184        5566 :       ASSEMBLE_UNOP(notl);
    1185             :       break;
    1186             :     case kX64Neg:
    1187       16776 :       ASSEMBLE_UNOP(negq);
    1188             :       break;
    1189             :     case kX64Neg32:
    1190       13418 :       ASSEMBLE_UNOP(negl);
    1191             :       break;
    1192             :     case kX64Or32:
    1193      590899 :       ASSEMBLE_BINOP(orl);
    1194             :       break;
    1195             :     case kX64Or:
    1196       89332 :       ASSEMBLE_BINOP(orq);
    1197             :       break;
    1198             :     case kX64Xor32:
    1199       80302 :       ASSEMBLE_BINOP(xorl);
    1200             :       break;
    1201             :     case kX64Xor:
    1202        3272 :       ASSEMBLE_BINOP(xorq);
    1203             :       break;
    1204             :     case kX64Shl32:
    1205      118352 :       ASSEMBLE_SHIFT(shll, 5);
    1206             :       break;
    1207             :     case kX64Shl:
    1208       73686 :       ASSEMBLE_SHIFT(shlq, 6);
    1209             :       break;
    1210             :     case kX64Shr32:
    1211      394418 :       ASSEMBLE_SHIFT(shrl, 5);
    1212             :       break;
    1213             :     case kX64Shr:
    1214      196000 :       ASSEMBLE_SHIFT(shrq, 6);
    1215             :       break;
    1216             :     case kX64Sar32:
    1217      718490 :       ASSEMBLE_SHIFT(sarl, 5);
    1218             :       break;
    1219             :     case kX64Sar:
    1220     2573272 :       ASSEMBLE_SHIFT(sarq, 6);
    1221             :       break;
    1222             :     case kX64Ror32:
    1223      110630 :       ASSEMBLE_SHIFT(rorl, 5);
    1224             :       break;
    1225             :     case kX64Ror:
    1226         368 :       ASSEMBLE_SHIFT(rorq, 6);
    1227             :       break;
    1228             :     case kX64Lzcnt:
    1229          36 :       if (instr->InputAt(0)->IsRegister()) {
    1230          36 :         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
    1231             :       } else {
    1232           0 :         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
    1233             :       }
    1234             :       break;
    1235             :     case kX64Lzcnt32:
    1236         446 :       if (instr->InputAt(0)->IsRegister()) {
    1237         446 :         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
    1238             :       } else {
    1239           0 :         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
    1240             :       }
    1241             :       break;
    1242             :     case kX64Tzcnt:
    1243          44 :       if (instr->InputAt(0)->IsRegister()) {
    1244          44 :         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
    1245             :       } else {
    1246           0 :         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
    1247             :       }
    1248             :       break;
    1249             :     case kX64Tzcnt32:
    1250         332 :       if (instr->InputAt(0)->IsRegister()) {
    1251         332 :         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
    1252             :       } else {
    1253           0 :         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
    1254             :       }
    1255             :       break;
    1256             :     case kX64Popcnt:
    1257          44 :       if (instr->InputAt(0)->IsRegister()) {
    1258          44 :         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
    1259             :       } else {
    1260           0 :         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
    1261             :       }
    1262             :       break;
    1263             :     case kX64Popcnt32:
    1264          64 :       if (instr->InputAt(0)->IsRegister()) {
    1265          64 :         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
    1266             :       } else {
    1267           0 :         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
    1268             :       }
    1269             :       break;
    1270             :     case kX64Bswap:
    1271          12 :       __ bswapq(i.OutputRegister());
    1272          12 :       break;
    1273             :     case kX64Bswap32:
    1274          44 :       __ bswapl(i.OutputRegister());
    1275          44 :       break;
    1276             :     case kSSEFloat32Cmp:
    1277           0 :       ASSEMBLE_SSE_BINOP(Ucomiss);
    1278             :       break;
    1279             :     case kSSEFloat32Add:
    1280           0 :       ASSEMBLE_SSE_BINOP(addss);
    1281             :       break;
    1282             :     case kSSEFloat32Sub:
    1283           0 :       ASSEMBLE_SSE_BINOP(subss);
    1284             :       break;
    1285             :     case kSSEFloat32Mul:
    1286           0 :       ASSEMBLE_SSE_BINOP(mulss);
    1287             :       break;
    1288             :     case kSSEFloat32Div:
    1289           0 :       ASSEMBLE_SSE_BINOP(divss);
    1290             :       // Don't delete this mov. It may improve performance on some CPUs,
    1291             :       // when there is a (v)mulss depending on the result.
    1292           0 :       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1293           0 :       break;
    1294             :     case kSSEFloat32Abs: {
    1295             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1296           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1297           0 :       __ psrlq(kScratchDoubleReg, 33);
    1298           0 :       __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1299           0 :       break;
    1300             :     }
    1301             :     case kSSEFloat32Neg: {
    1302             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1303           0 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1304           0 :       __ psllq(kScratchDoubleReg, 31);
    1305           0 :       __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
    1306           0 :       break;
    1307             :     }
    1308             :     case kSSEFloat32Sqrt:
    1309         348 :       ASSEMBLE_SSE_UNOP(sqrtss);
    1310             :       break;
    1311             :     case kSSEFloat32ToFloat64:
    1312       40804 :       ASSEMBLE_SSE_UNOP(Cvtss2sd);
    1313             :       break;
    1314             :     case kSSEFloat32Round: {
    1315             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1316             :       RoundingMode const mode =
    1317             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1318             :       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1319             :       break;
    1320             :     }
    1321             :     case kSSEFloat32ToInt32:
    1322         348 :       if (instr->InputAt(0)->IsFPRegister()) {
    1323         348 :         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1324             :       } else {
    1325           0 :         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
    1326             :       }
    1327             :       break;
    1328             :     case kSSEFloat32ToUint32: {
    1329          56 :       if (instr->InputAt(0)->IsFPRegister()) {
    1330          56 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1331             :       } else {
    1332           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1333             :       }
    1334             :       break;
    1335             :     }
    1336             :     case kSSEFloat64Cmp:
    1337        1704 :       ASSEMBLE_SSE_BINOP(Ucomisd);
    1338             :       break;
    1339             :     case kSSEFloat64Add:
    1340         632 :       ASSEMBLE_SSE_BINOP(addsd);
    1341             :       break;
    1342             :     case kSSEFloat64Sub:
    1343         468 :       ASSEMBLE_SSE_BINOP(subsd);
    1344             :       break;
    1345             :     case kSSEFloat64Mul:
    1346          56 :       ASSEMBLE_SSE_BINOP(mulsd);
    1347             :       break;
    1348             :     case kSSEFloat64Div:
    1349          52 :       ASSEMBLE_SSE_BINOP(divsd);
    1350             :       // Don't delete this mov. It may improve performance on some CPUs,
    1351             :       // when there is a (v)mulsd depending on the result.
    1352             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1353             :       break;
    1354             :     case kSSEFloat64Mod: {
    1355        1646 :       __ subq(rsp, Immediate(kDoubleSize));
    1356        1646 :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1357        1646 :                                                        kDoubleSize);
    1358             :       // Move values to st(0) and st(1).
    1359        3292 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
    1360        1646 :       __ fld_d(Operand(rsp, 0));
    1361        3292 :       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    1362        1646 :       __ fld_d(Operand(rsp, 0));
    1363             :       // Loop while fprem isn't done.
    1364        1646 :       Label mod_loop;
    1365        1646 :       __ bind(&mod_loop);
    1366             :       // This instructions traps on all kinds inputs, but we are assuming the
    1367             :       // floating point control word is set to ignore them all.
    1368        1646 :       __ fprem();
    1369             :       // The following 2 instruction implicitly use rax.
    1370        1646 :       __ fnstsw_ax();
    1371        1646 :       if (CpuFeatures::IsSupported(SAHF)) {
    1372             :         CpuFeatureScope sahf_scope(tasm(), SAHF);
    1373        1614 :         __ sahf();
    1374             :       } else {
    1375             :         __ shrl(rax, Immediate(8));
    1376             :         __ andl(rax, Immediate(0xFF));
    1377          32 :         __ pushq(rax);
    1378             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1379          32 :                                                          kSystemPointerSize);
    1380          32 :         __ popfq();
    1381             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1382          32 :                                                          -kSystemPointerSize);
    1383             :       }
    1384        1646 :       __ j(parity_even, &mod_loop);
    1385             :       // Move output to stack and clean up.
    1386        1646 :       __ fstp(1);
    1387        1646 :       __ fstp_d(Operand(rsp, 0));
    1388        3292 :       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
    1389             :       __ addq(rsp, Immediate(kDoubleSize));
    1390             :       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    1391        1646 :                                                        -kDoubleSize);
    1392             :       break;
    1393             :     }
    1394             :     case kSSEFloat32Max: {
    1395          66 :       Label compare_swap, done_compare;
    1396          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1397             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1398             :       } else {
    1399           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1400             :       }
    1401             :       auto ool =
    1402             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1403          66 :       __ j(parity_even, ool->entry());
    1404          66 :       __ j(above, &done_compare, Label::kNear);
    1405          66 :       __ j(below, &compare_swap, Label::kNear);
    1406             :       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
    1407             :       __ testl(kScratchRegister, Immediate(1));
    1408          66 :       __ j(zero, &done_compare, Label::kNear);
    1409          66 :       __ bind(&compare_swap);
    1410          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1411             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1412             :       } else {
    1413           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1414             :       }
    1415          66 :       __ bind(&done_compare);
    1416          66 :       __ bind(ool->exit());
    1417             :       break;
    1418             :     }
    1419             :     case kSSEFloat32Min: {
    1420          66 :       Label compare_swap, done_compare;
    1421          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1422             :         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1423             :       } else {
    1424           0 :         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1425             :       }
    1426             :       auto ool =
    1427             :           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
    1428          66 :       __ j(parity_even, ool->entry());
    1429          66 :       __ j(below, &done_compare, Label::kNear);
    1430          66 :       __ j(above, &compare_swap, Label::kNear);
    1431          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1432             :         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
    1433             :       } else {
    1434           0 :         __ Movss(kScratchDoubleReg, i.InputOperand(1));
    1435             :         __ Movmskps(kScratchRegister, kScratchDoubleReg);
    1436             :       }
    1437             :       __ testl(kScratchRegister, Immediate(1));
    1438          66 :       __ j(zero, &done_compare, Label::kNear);
    1439          66 :       __ bind(&compare_swap);
    1440          66 :       if (instr->InputAt(1)->IsFPRegister()) {
    1441             :         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1442             :       } else {
    1443           0 :         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
    1444             :       }
    1445          66 :       __ bind(&done_compare);
    1446          66 :       __ bind(ool->exit());
    1447             :       break;
    1448             :     }
    1449             :     case kSSEFloat64Max: {
    1450         252 :       Label compare_swap, done_compare;
    1451         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1452             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1453             :       } else {
    1454           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1455             :       }
    1456             :       auto ool =
    1457             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1458         252 :       __ j(parity_even, ool->entry());
    1459         252 :       __ j(above, &done_compare, Label::kNear);
    1460         252 :       __ j(below, &compare_swap, Label::kNear);
    1461             :       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
    1462             :       __ testl(kScratchRegister, Immediate(1));
    1463         252 :       __ j(zero, &done_compare, Label::kNear);
    1464         252 :       __ bind(&compare_swap);
    1465         252 :       if (instr->InputAt(1)->IsFPRegister()) {
    1466             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1467             :       } else {
    1468           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1469             :       }
    1470         252 :       __ bind(&done_compare);
    1471         252 :       __ bind(ool->exit());
    1472             :       break;
    1473             :     }
    1474             :     case kSSEFloat64Min: {
    1475         337 :       Label compare_swap, done_compare;
    1476         337 :       if (instr->InputAt(1)->IsFPRegister()) {
    1477             :         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1478             :       } else {
    1479           0 :         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1480             :       }
    1481             :       auto ool =
    1482             :           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
    1483         337 :       __ j(parity_even, ool->entry());
    1484         337 :       __ j(below, &done_compare, Label::kNear);
    1485         337 :       __ j(above, &compare_swap, Label::kNear);
    1486         337 :       if (instr->InputAt(1)->IsFPRegister()) {
    1487             :         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
    1488             :       } else {
    1489           0 :         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
    1490             :         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
    1491             :       }
    1492             :       __ testl(kScratchRegister, Immediate(1));
    1493         337 :       __ j(zero, &done_compare, Label::kNear);
    1494         337 :       __ bind(&compare_swap);
    1495         337 :       if (instr->InputAt(1)->IsFPRegister()) {
    1496             :         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1497             :       } else {
    1498           0 :         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
    1499             :       }
    1500         337 :       __ bind(&done_compare);
    1501         337 :       __ bind(ool->exit());
    1502             :       break;
    1503             :     }
    1504             :     case kSSEFloat64Abs: {
    1505             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1506           6 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1507           6 :       __ psrlq(kScratchDoubleReg, 1);
    1508           6 :       __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1509           6 :       break;
    1510             :     }
    1511             :     case kSSEFloat64Neg: {
    1512             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1513          76 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    1514          76 :       __ psllq(kScratchDoubleReg, 63);
    1515          76 :       __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
    1516          76 :       break;
    1517             :     }
    1518             :     case kSSEFloat64Sqrt:
    1519         415 :       ASSEMBLE_SSE_UNOP(Sqrtsd);
    1520             :       break;
    1521             :     case kSSEFloat64Round: {
    1522             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    1523             :       RoundingMode const mode =
    1524             :           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
    1525             :       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
    1526             :       break;
    1527             :     }
    1528             :     case kSSEFloat64ToFloat32:
    1529       35866 :       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
    1530             :       break;
    1531             :     case kSSEFloat64ToInt32:
    1532      128448 :       if (instr->InputAt(0)->IsFPRegister()) {
    1533      108118 :         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
    1534             :       } else {
    1535       40660 :         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
    1536             :       }
    1537             :       break;
    1538             :     case kSSEFloat64ToUint32: {
    1539         750 :       if (instr->InputAt(0)->IsFPRegister()) {
    1540         750 :         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1541             :       } else {
    1542           0 :         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
    1543             :       }
    1544        1500 :       if (MiscField::decode(instr->opcode())) {
    1545         690 :         __ AssertZeroExtended(i.OutputRegister());
    1546             :       }
    1547             :       break;
    1548             :     }
    1549             :     case kSSEFloat32ToInt64:
    1550          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1551          52 :         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
    1552             :       } else {
    1553           0 :         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
    1554             :       }
    1555          52 :       if (instr->OutputCount() > 1) {
    1556          48 :         __ Set(i.OutputRegister(1), 1);
    1557          48 :         Label done;
    1558          48 :         Label fail;
    1559             :         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
    1560          48 :         if (instr->InputAt(0)->IsFPRegister()) {
    1561             :           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
    1562             :         } else {
    1563           0 :           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
    1564             :         }
    1565             :         // If the input is NaN, then the conversion fails.
    1566          48 :         __ j(parity_even, &fail);
    1567             :         // If the input is INT64_MIN, then the conversion succeeds.
    1568          48 :         __ j(equal, &done);
    1569             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1570             :         // If the conversion results in INT64_MIN, but the input was not
    1571             :         // INT64_MIN, then the conversion fails.
    1572          48 :         __ j(no_overflow, &done);
    1573          48 :         __ bind(&fail);
    1574          48 :         __ Set(i.OutputRegister(1), 0);
    1575          48 :         __ bind(&done);
    1576             :       }
    1577             :       break;
    1578             :     case kSSEFloat64ToInt64:
    1579         895 :       if (instr->InputAt(0)->IsFPRegister()) {
    1580         894 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
    1581             :       } else {
    1582           2 :         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
    1583             :       }
    1584         897 :       if (instr->OutputCount() > 1) {
    1585         766 :         __ Set(i.OutputRegister(1), 1);
    1586         768 :         Label done;
    1587         768 :         Label fail;
    1588             :         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
    1589         767 :         if (instr->InputAt(0)->IsFPRegister()) {
    1590             :           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
    1591             :         } else {
    1592           0 :           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
    1593             :         }
    1594             :         // If the input is NaN, then the conversion fails.
    1595         768 :         __ j(parity_even, &fail);
    1596             :         // If the input is INT64_MIN, then the conversion succeeds.
    1597         769 :         __ j(equal, &done);
    1598             :         __ cmpq(i.OutputRegister(0), Immediate(1));
    1599             :         // If the conversion results in INT64_MIN, but the input was not
    1600             :         // INT64_MIN, then the conversion fails.
    1601         766 :         __ j(no_overflow, &done);
    1602         771 :         __ bind(&fail);
    1603         766 :         __ Set(i.OutputRegister(1), 0);
    1604         767 :         __ bind(&done);
    1605             :       }
    1606             :       break;
    1607             :     case kSSEFloat32ToUint64: {
    1608          52 :       Label fail;
    1609         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1610          52 :       if (instr->InputAt(0)->IsFPRegister()) {
    1611          52 :         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1612             :       } else {
    1613           0 :         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1614             :       }
    1615         100 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1616          52 :       __ bind(&fail);
    1617             :       break;
    1618             :     }
    1619             :     case kSSEFloat64ToUint64: {
    1620        2972 :       Label fail;
    1621        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
    1622        2972 :       if (instr->InputAt(0)->IsFPRegister()) {
    1623        2972 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
    1624             :       } else {
    1625           0 :         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
    1626             :       }
    1627        3028 :       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
    1628        2972 :       __ bind(&fail);
    1629             :       break;
    1630             :     }
    1631             :     case kSSEInt32ToFloat64:
    1632      432758 :       if (instr->InputAt(0)->IsRegister()) {
    1633      375068 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1634             :       } else {
    1635      115374 :         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1636             :       }
    1637             :       break;
    1638             :     case kSSEInt32ToFloat32:
    1639         960 :       if (instr->InputAt(0)->IsRegister()) {
    1640         952 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1641             :       } else {
    1642          16 :         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1643             :       }
    1644             :       break;
    1645             :     case kSSEInt64ToFloat32:
    1646          31 :       if (instr->InputAt(0)->IsRegister()) {
    1647          31 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1648             :       } else {
    1649           0 :         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1650             :       }
    1651             :       break;
    1652             :     case kSSEInt64ToFloat64:
    1653        3256 :       if (instr->InputAt(0)->IsRegister()) {
    1654        1241 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1655             :       } else {
    1656        4030 :         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1657             :       }
    1658             :       break;
    1659             :     case kSSEUint64ToFloat32:
    1660          32 :       if (instr->InputAt(0)->IsRegister()) {
    1661          32 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1662             :       } else {
    1663           0 :         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1664             :       }
    1665             :       break;
    1666             :     case kSSEUint64ToFloat64:
    1667        3607 :       if (instr->InputAt(0)->IsRegister()) {
    1668        2375 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1669             :       } else {
    1670        2464 :         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1671             :       }
    1672             :       break;
    1673             :     case kSSEUint32ToFloat64:
    1674       11202 :       if (instr->InputAt(0)->IsRegister()) {
    1675         395 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
    1676             :       } else {
    1677       21614 :         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
    1678             :       }
    1679             :       break;
    1680             :     case kSSEUint32ToFloat32:
    1681          88 :       if (instr->InputAt(0)->IsRegister()) {
    1682          88 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
    1683             :       } else {
    1684           0 :         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
    1685             :       }
    1686             :       break;
    1687             :     case kSSEFloat64ExtractLowWord32:
    1688         116 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1689           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    1690             :       } else {
    1691             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    1692             :       }
    1693             :       break;
    1694             :     case kSSEFloat64ExtractHighWord32:
    1695       96416 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    1696      122818 :         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
    1697             :       } else {
    1698       35007 :         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
    1699             :       }
    1700             :       break;
    1701             :     case kSSEFloat64InsertLowWord32:
    1702           4 :       if (instr->InputAt(1)->IsRegister()) {
    1703           4 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
    1704             :       } else {
    1705           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
    1706             :       }
    1707             :       break;
    1708             :     case kSSEFloat64InsertHighWord32:
    1709         116 :       if (instr->InputAt(1)->IsRegister()) {
    1710         116 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
    1711             :       } else {
    1712           0 :         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
    1713             :       }
    1714             :       break;
    1715             :     case kSSEFloat64LoadLowWord32:
    1716         112 :       if (instr->InputAt(0)->IsRegister()) {
    1717             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    1718             :       } else {
    1719           0 :         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
    1720             :       }
    1721             :       break;
    1722             :     case kAVXFloat32Cmp: {
    1723             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1724        1481 :       if (instr->InputAt(1)->IsFPRegister()) {
    1725        1463 :         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1726             :       } else {
    1727          36 :         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
    1728             :       }
    1729             :       break;
    1730             :     }
    1731             :     case kAVXFloat32Add:
    1732        3586 :       ASSEMBLE_AVX_BINOP(vaddss);
    1733             :       break;
    1734             :     case kAVXFloat32Sub:
    1735        5130 :       ASSEMBLE_AVX_BINOP(vsubss);
    1736             :       break;
    1737             :     case kAVXFloat32Mul:
    1738        1742 :       ASSEMBLE_AVX_BINOP(vmulss);
    1739             :       break;
    1740             :     case kAVXFloat32Div:
    1741         704 :       ASSEMBLE_AVX_BINOP(vdivss);
    1742             :       // Don't delete this mov. It may improve performance on some CPUs,
    1743             :       // when there is a (v)mulss depending on the result.
    1744             :       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1745             :       break;
    1746             :     case kAVXFloat64Cmp: {
    1747             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1748      254645 :       if (instr->InputAt(1)->IsFPRegister()) {
    1749      233953 :         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
    1750             :       } else {
    1751       41384 :         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
    1752             :       }
    1753             :       break;
    1754             :     }
    1755             :     case kAVXFloat64Add:
    1756      159360 :       ASSEMBLE_AVX_BINOP(vaddsd);
    1757             :       break;
    1758             :     case kAVXFloat64Sub:
    1759       31234 :       ASSEMBLE_AVX_BINOP(vsubsd);
    1760             :       break;
    1761             :     case kAVXFloat64Mul:
    1762       23700 :       ASSEMBLE_AVX_BINOP(vmulsd);
    1763             :       break;
    1764             :     case kAVXFloat64Div:
    1765       23878 :       ASSEMBLE_AVX_BINOP(vdivsd);
    1766             :       // Don't delete this mov. It may improve performance on some CPUs,
    1767             :       // when there is a (v)mulsd depending on the result.
    1768             :       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
    1769             :       break;
    1770             :     case kAVXFloat32Abs: {
    1771             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1772             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1773          66 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1774             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
    1775          66 :       if (instr->InputAt(0)->IsFPRegister()) {
    1776             :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1777             :                   i.InputDoubleRegister(0));
    1778             :       } else {
    1779           0 :         __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1780             :                   i.InputOperand(0));
    1781             :       }
    1782             :       break;
    1783             :     }
    1784             :     case kAVXFloat32Neg: {
    1785             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1786             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1787         168 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1788             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
    1789         166 :       if (instr->InputAt(0)->IsFPRegister()) {
    1790             :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1791             :                   i.InputDoubleRegister(0));
    1792             :       } else {
    1793           0 :         __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
    1794             :                   i.InputOperand(0));
    1795             :       }
    1796             :       break;
    1797             :     }
    1798             :     case kAVXFloat64Abs: {
    1799             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1800             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1801         620 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1802             :       __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
    1803         620 :       if (instr->InputAt(0)->IsFPRegister()) {
    1804             :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1805             :                   i.InputDoubleRegister(0));
    1806             :       } else {
    1807           0 :         __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1808             :                   i.InputOperand(0));
    1809             :       }
    1810             :       break;
    1811             :     }
    1812             :     case kAVXFloat64Neg: {
    1813             :       // TODO(bmeurer): Use RIP relative 128-bit constants.
    1814             :       CpuFeatureScope avx_scope(tasm(), AVX);
    1815       10557 :       __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
    1816             :       __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
    1817       10557 :       if (instr->InputAt(0)->IsFPRegister()) {
    1818             :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1819             :                   i.InputDoubleRegister(0));
    1820             :       } else {
    1821          82 :         __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
    1822             :                   i.InputOperand(0));
    1823             :       }
    1824             :       break;
    1825             :     }
    1826             :     case kSSEFloat64SilenceNaN:
    1827             :       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
    1828             :       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
    1829             :       break;
    1830             :     case kX64Movsxbl:
    1831       41599 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1832       83809 :       ASSEMBLE_MOVX(movsxbl);
    1833       41599 :       __ AssertZeroExtended(i.OutputRegister());
    1834       41599 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1835       41599 :       break;
    1836             :     case kX64Movzxbl:
    1837      175453 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1838      352131 :       ASSEMBLE_MOVX(movzxbl);
    1839      175454 :       __ AssertZeroExtended(i.OutputRegister());
    1840      175454 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1841      175454 :       break;
    1842             :     case kX64Movsxbq:
    1843       14291 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1844       28586 :       ASSEMBLE_MOVX(movsxbq);
    1845       14291 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1846       14291 :       break;
    1847             :     case kX64Movzxbq:
    1848       14804 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1849       29608 :       ASSEMBLE_MOVX(movzxbq);
    1850       14804 :       __ AssertZeroExtended(i.OutputRegister());
    1851       14804 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1852       14804 :       break;
    1853             :     case kX64Movb: {
    1854       80019 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1855       80017 :       size_t index = 0;
    1856       80017 :       Operand operand = i.MemoryOperand(&index);
    1857      160034 :       if (HasImmediateInput(instr, index)) {
    1858       13332 :         __ movb(operand, Immediate(i.InputInt8(index)));
    1859             :       } else {
    1860       73351 :         __ movb(operand, i.InputRegister(index));
    1861             :       }
    1862       80018 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1863             :       break;
    1864             :     }
    1865             :     case kX64Movsxwl:
    1866        9864 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1867       20210 :       ASSEMBLE_MOVX(movsxwl);
    1868        9864 :       __ AssertZeroExtended(i.OutputRegister());
    1869        9864 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1870        9864 :       break;
    1871             :     case kX64Movzxwl:
    1872      153769 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1873      313872 :       ASSEMBLE_MOVX(movzxwl);
    1874      153769 :       __ AssertZeroExtended(i.OutputRegister());
    1875      153769 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1876      153769 :       break;
    1877             :     case kX64Movsxwq:
    1878        9643 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1879       19290 :       ASSEMBLE_MOVX(movsxwq);
    1880             :       break;
    1881             :     case kX64Movzxwq:
    1882        1236 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1883        2472 :       ASSEMBLE_MOVX(movzxwq);
    1884        1236 :       __ AssertZeroExtended(i.OutputRegister());
    1885        1236 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1886        1236 :       break;
    1887             :     case kX64Movw: {
    1888       15315 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1889       15315 :       size_t index = 0;
    1890       15315 :       Operand operand = i.MemoryOperand(&index);
    1891       30630 :       if (HasImmediateInput(instr, index)) {
    1892        1798 :         __ movw(operand, Immediate(i.InputInt16(index)));
    1893             :       } else {
    1894       14416 :         __ movw(operand, i.InputRegister(index));
    1895             :       }
    1896       15316 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1897             :       break;
    1898             :     }
    1899             :     case kX64Movl:
    1900     2640837 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1901     2640491 :       if (instr->HasOutput()) {
    1902     1320808 :         if (instr->addressing_mode() == kMode_None) {
    1903      606633 :           if (instr->InputAt(0)->IsRegister()) {
    1904      571356 :             __ movl(i.OutputRegister(), i.InputRegister(0));
    1905             :           } else {
    1906       70554 :             __ movl(i.OutputRegister(), i.InputOperand(0));
    1907             :           }
    1908             :         } else {
    1909      714178 :           __ movl(i.OutputRegister(), i.MemoryOperand());
    1910             :         }
    1911     1320920 :         __ AssertZeroExtended(i.OutputRegister());
    1912             :       } else {
    1913     1319683 :         size_t index = 0;
    1914     1319683 :         Operand operand = i.MemoryOperand(&index);
    1915     2639656 :         if (HasImmediateInput(instr, index)) {
    1916      364417 :           __ movl(operand, i.InputImmediate(index));
    1917             :         } else {
    1918      955411 :           __ movl(operand, i.InputRegister(index));
    1919             :         }
    1920             :       }
    1921     2640675 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1922     2640754 :       break;
    1923             :     case kX64Movsxlq:
    1924      419547 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1925     1248225 :       ASSEMBLE_MOVX(movsxlq);
    1926      419545 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    1927      419550 :       break;
    1928             :     case kX64MovqDecompressTaggedSigned: {
    1929       87021 :       CHECK(instr->HasOutput());
    1930       87022 :       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
    1931       87023 :       break;
    1932             :     }
    1933             :     case kX64MovqDecompressTaggedPointer: {
    1934     1241066 :       CHECK(instr->HasOutput());
    1935     1241080 :       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
    1936     1241082 :       break;
    1937             :     }
    1938             :     case kX64MovqDecompressAnyTagged: {
    1939     2932646 :       CHECK(instr->HasOutput());
    1940     2932650 :       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
    1941     2932653 :       break;
    1942             :     }
    1943             :     case kX64MovqCompressTagged: {
    1944     2487159 :       CHECK(!instr->HasOutput());
    1945     2487159 :       size_t index = 0;
    1946     2487159 :       Operand operand = i.MemoryOperand(&index);
    1947     4974320 :       if (HasImmediateInput(instr, index)) {
    1948      197938 :         __ StoreTaggedField(operand, i.InputImmediate(index));
    1949             :       } else {
    1950     2289222 :         __ StoreTaggedField(operand, i.InputRegister(index));
    1951             :       }
    1952             :       break;
    1953             :     }
    1954             :     case kX64DecompressSigned: {
    1955           4 :       CHECK(instr->HasOutput());
    1956           4 :       __ movsxlq(i.OutputRegister(), i.InputRegister(0));
    1957           4 :       break;
    1958             :     }
    1959             :     case kX64DecompressPointer: {
    1960           4 :       CHECK(instr->HasOutput());
    1961           4 :       __ movsxlq(i.OutputRegister(), i.InputRegister(0));
    1962             :       __ addq(i.OutputRegister(), kRootRegister);
    1963             :       break;
    1964             :     }
    1965             :     case kX64DecompressAny: {
    1966           8 :       CHECK(instr->HasOutput());
    1967           8 :       __ movsxlq(i.OutputRegister(), i.InputRegister(0));
    1968             :       // TODO(solanes): Do branchful compute?
    1969             :       // Branchlessly compute |masked_root|:
    1970             :       STATIC_ASSERT((kSmiTagSize == 1) && (kSmiTag < 32));
    1971             :       Register masked_root = kScratchRegister;
    1972             :       __ movl(masked_root, i.OutputRegister());
    1973             :       __ andl(masked_root, Immediate(kSmiTagMask));
    1974             :       __ negq(masked_root);
    1975             :       __ andq(masked_root, kRootRegister);
    1976             :       // Now this add operation will either leave the value unchanged if it is a
    1977             :       // smi or add the isolate root if it is a heap object.
    1978             :       __ addq(i.OutputRegister(), masked_root);
    1979             :       break;
    1980             :     }
    1981             :     // TODO(solanes): Combine into one Compress? They seem to be identical.
    1982             :     // TODO(solanes): We might get away with doing a no-op in these three cases.
    1983             :     // The movl instruction is the conservative way for the moment.
    1984             :     case kX64CompressSigned: {
    1985           4 :       __ movl(i.OutputRegister(), i.InputRegister(0));
    1986             :       break;
    1987             :     }
    1988             :     case kX64CompressPointer: {
    1989           4 :       __ movl(i.OutputRegister(), i.InputRegister(0));
    1990             :       break;
    1991             :     }
    1992             :     case kX64CompressAny: {
    1993           8 :       __ movl(i.OutputRegister(), i.InputRegister(0));
    1994             :       break;
    1995             :     }
    1996             :     case kX64Movq:
    1997     2101607 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    1998     2101661 :       if (instr->HasOutput()) {
    1999     1591481 :         __ movq(i.OutputRegister(), i.MemoryOperand());
    2000             :       } else {
    2001      510340 :         size_t index = 0;
    2002      510340 :         Operand operand = i.MemoryOperand(&index);
    2003     1020704 :         if (HasImmediateInput(instr, index)) {
    2004        1304 :           __ movq(operand, i.InputImmediate(index));
    2005             :         } else {
    2006      509048 :           __ movq(operand, i.InputRegister(index));
    2007             :         }
    2008             :       }
    2009     2101966 :       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
    2010     2101859 :       break;
    2011             :     case kX64Movss:
    2012       28763 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2013       28763 :       if (instr->HasOutput()) {
    2014       15638 :         __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
    2015             :       } else {
    2016       13126 :         size_t index = 0;
    2017       13126 :         Operand operand = i.MemoryOperand(&index);
    2018       26252 :         __ movss(operand, i.InputDoubleRegister(index));
    2019             :       }
    2020             :       break;
    2021             :     case kX64Movsd: {
    2022      673675 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2023      673709 :       if (instr->HasOutput()) {
    2024             :         const MemoryAccessMode access_mode =
    2025             :             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
    2026      423762 :         if (access_mode == kMemoryAccessPoisoned) {
    2027             :           // If we have to poison the loaded value, we load into a general
    2028             :           // purpose register first, mask it with the poison, and move the
    2029             :           // value from the general purpose register into the double register.
    2030           0 :           __ movq(kScratchRegister, i.MemoryOperand());
    2031             :           __ andq(kScratchRegister, kSpeculationPoisonRegister);
    2032             :           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
    2033             :         } else {
    2034             :           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
    2035             :         }
    2036             :       } else {
    2037      249947 :         size_t index = 0;
    2038      249947 :         Operand operand = i.MemoryOperand(&index);
    2039      249945 :         __ Movsd(operand, i.InputDoubleRegister(index));
    2040             :       }
    2041             :       break;
    2042             :     }
    2043             :     case kX64Movdqu: {
    2044             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2045        9740 :       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
    2046        9740 :       if (instr->HasOutput()) {
    2047        5660 :         __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
    2048             :       } else {
    2049        4080 :         size_t index = 0;
    2050        4080 :         Operand operand = i.MemoryOperand(&index);
    2051        8160 :         __ movdqu(operand, i.InputSimd128Register(index));
    2052             :       }
    2053             :       break;
    2054             :     }
    2055             :     case kX64BitcastFI:
    2056         554 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2057           0 :         __ movl(i.OutputRegister(), i.InputOperand(0));
    2058             :       } else {
    2059             :         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
    2060             :       }
    2061             :       break;
    2062             :     case kX64BitcastDL:
    2063         531 :       if (instr->InputAt(0)->IsFPStackSlot()) {
    2064           0 :         __ movq(i.OutputRegister(), i.InputOperand(0));
    2065             :       } else {
    2066             :         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
    2067             :       }
    2068             :       break;
    2069             :     case kX64BitcastIF:
    2070         307 :       if (instr->InputAt(0)->IsRegister()) {
    2071             :         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
    2072             :       } else {
    2073           0 :         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
    2074             :       }
    2075             :       break;
    2076             :     case kX64BitcastLD:
    2077         153 :       if (instr->InputAt(0)->IsRegister()) {
    2078             :         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
    2079             :       } else {
    2080           0 :         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
    2081             :       }
    2082             :       break;
    2083             :     case kX64Lea32: {
    2084             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2085             :       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
    2086             :       // and addressing mode just happens to work out. The "addl"/"subl" forms
    2087             :       // in these cases are faster based on measurements.
    2088      414306 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2089      204004 :         if (mode == kMode_MRI) {
    2090             :           int32_t constant_summand = i.InputInt32(1);
    2091             :           DCHECK_NE(0, constant_summand);
    2092      114167 :           if (constant_summand > 0) {
    2093       85828 :             __ addl(i.OutputRegister(), Immediate(constant_summand));
    2094             :           } else {
    2095       28339 :             __ subl(i.OutputRegister(),
    2096             :                     Immediate(base::NegateWithWraparound(constant_summand)));
    2097             :           }
    2098       89826 :         } else if (mode == kMode_MR1) {
    2099       29588 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2100       19819 :             __ shll(i.OutputRegister(), Immediate(1));
    2101             :           } else {
    2102        9769 :             __ addl(i.OutputRegister(), i.InputRegister(1));
    2103             :           }
    2104       60238 :         } else if (mode == kMode_M2) {
    2105           0 :           __ shll(i.OutputRegister(), Immediate(1));
    2106       60238 :         } else if (mode == kMode_M4) {
    2107        2912 :           __ shll(i.OutputRegister(), Immediate(2));
    2108       57326 :         } else if (mode == kMode_M8) {
    2109          95 :           __ shll(i.OutputRegister(), Immediate(3));
    2110             :         } else {
    2111       57232 :           __ leal(i.OutputRegister(), i.MemoryOperand());
    2112             :         }
    2113      261042 :       } else if (mode == kMode_MR1 &&
    2114             :                  i.InputRegister(1) == i.OutputRegister()) {
    2115       25758 :         __ addl(i.OutputRegister(), i.InputRegister(0));
    2116             :       } else {
    2117      184544 :         __ leal(i.OutputRegister(), i.MemoryOperand());
    2118             :       }
    2119      414320 :       __ AssertZeroExtended(i.OutputRegister());
    2120      414310 :       break;
    2121             :     }
    2122             :     case kX64Lea: {
    2123             :       AddressingMode mode = AddressingModeField::decode(instr->opcode());
    2124             :       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
    2125             :       // and addressing mode just happens to work out. The "addq"/"subq" forms
    2126             :       // in these cases are faster based on measurements.
    2127     2410137 :       if (i.InputRegister(0) == i.OutputRegister()) {
    2128      631439 :         if (mode == kMode_MRI) {
    2129             :           int32_t constant_summand = i.InputInt32(1);
    2130      382125 :           if (constant_summand > 0) {
    2131      297853 :             __ addq(i.OutputRegister(), Immediate(constant_summand));
    2132       84272 :           } else if (constant_summand < 0) {
    2133      168492 :             __ subq(i.OutputRegister(), Immediate(-constant_summand));
    2134             :           }
    2135      249315 :         } else if (mode == kMode_MR1) {
    2136      193786 :           if (i.InputRegister(1) == i.OutputRegister()) {
    2137      145868 :             __ shlq(i.OutputRegister(), Immediate(1));
    2138             :           } else {
    2139       47918 :             __ addq(i.OutputRegister(), i.InputRegister(1));
    2140             :           }
    2141       55529 :         } else if (mode == kMode_M2) {
    2142           0 :           __ shlq(i.OutputRegister(), Immediate(1));
    2143       55529 :         } else if (mode == kMode_M4) {
    2144       10552 :           __ shlq(i.OutputRegister(), Immediate(2));
    2145       44977 :         } else if (mode == kMode_M8) {
    2146        2130 :           __ shlq(i.OutputRegister(), Immediate(3));
    2147             :         } else {
    2148       42847 :           __ leaq(i.OutputRegister(), i.MemoryOperand());
    2149             :         }
    2150     2113842 :       } else if (mode == kMode_MR1 &&
    2151             :                  i.InputRegister(1) == i.OutputRegister()) {
    2152      170359 :         __ addq(i.OutputRegister(), i.InputRegister(0));
    2153             :       } else {
    2154     1608342 :         __ leaq(i.OutputRegister(), i.MemoryOperand());
    2155             :       }
    2156             :       break;
    2157             :     }
    2158             :     case kX64Dec32:
    2159           0 :       __ decl(i.OutputRegister());
    2160             :       break;
    2161             :     case kX64Inc32:
    2162           0 :       __ incl(i.OutputRegister());
    2163             :       break;
    2164             :     case kX64Push:
    2165     3680031 :       if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
    2166           4 :         size_t index = 0;
    2167           4 :         Operand operand = i.MemoryOperand(&index);
    2168           4 :         __ pushq(operand);
    2169             :         frame_access_state()->IncreaseSPDelta(1);
    2170           4 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2171           4 :                                                          kSystemPointerSize);
    2172     3680027 :       } else if (HasImmediateInput(instr, 0)) {
    2173      940911 :         __ pushq(i.InputImmediate(0));
    2174             :         frame_access_state()->IncreaseSPDelta(1);
    2175      940914 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2176      940914 :                                                          kSystemPointerSize);
    2177     2739114 :       } else if (instr->InputAt(0)->IsRegister()) {
    2178     2011821 :         __ pushq(i.InputRegister(0));
    2179             :         frame_access_state()->IncreaseSPDelta(1);
    2180     2011828 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2181     2011828 :                                                          kSystemPointerSize);
    2182     1446670 :       } else if (instr->InputAt(0)->IsFloatRegister() ||
    2183             :                  instr->InputAt(0)->IsDoubleRegister()) {
    2184             :         // TODO(titzer): use another machine instruction?
    2185       14806 :         __ subq(rsp, Immediate(kDoubleSize));
    2186             :         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
    2187       14806 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2188       14806 :                                                          kDoubleSize);
    2189       29612 :         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
    2190      712487 :       } else if (instr->InputAt(0)->IsSimd128Register()) {
    2191             :         // TODO(titzer): use another machine instruction?
    2192         120 :         __ subq(rsp, Immediate(kSimd128Size));
    2193             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2194             :                                               kSystemPointerSize);
    2195         120 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2196         120 :                                                          kSimd128Size);
    2197         240 :         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
    2198      727064 :       } else if (instr->InputAt(0)->IsStackSlot() ||
    2199      723176 :                  instr->InputAt(0)->IsFloatStackSlot() ||
    2200             :                  instr->InputAt(0)->IsDoubleStackSlot()) {
    2201      712103 :         __ pushq(i.InputOperand(0));
    2202             :         frame_access_state()->IncreaseSPDelta(1);
    2203      712108 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2204      712108 :                                                          kSystemPointerSize);
    2205             :       } else {
    2206             :         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
    2207         264 :         __ Movups(kScratchDoubleReg, i.InputOperand(0));
    2208             :         // TODO(titzer): use another machine instruction?
    2209             :         __ subq(rsp, Immediate(kSimd128Size));
    2210             :         frame_access_state()->IncreaseSPDelta(kSimd128Size /
    2211             :                                               kSystemPointerSize);
    2212         264 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    2213         264 :                                                          kSimd128Size);
    2214         528 :         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
    2215             :       }
    2216             :       break;
    2217             :     case kX64Poke: {
    2218             :       int slot = MiscField::decode(instr->opcode());
    2219        3392 :       if (HasImmediateInput(instr, 0)) {
    2220        2264 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
    2221             :       } else {
    2222        4520 :         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
    2223             :       }
    2224             :       break;
    2225             :     }
    2226             :     case kX64Peek: {
    2227             :       int reverse_slot = i.InputInt32(0);
    2228             :       int offset =
    2229        5072 :           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
    2230        5072 :       if (instr->OutputAt(0)->IsFPRegister()) {
    2231             :         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
    2232        2528 :         if (op->representation() == MachineRepresentation::kFloat64) {
    2233        2528 :           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
    2234             :         } else {
    2235             :           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
    2236        2528 :           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
    2237             :         }
    2238             :       } else {
    2239        7632 :         __ movq(i.OutputRegister(), Operand(rbp, offset));
    2240             :       }
    2241             :       break;
    2242             :     }
    2243             :     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
    2244             :     case kX64F32x4Splat: {
    2245         140 :       XMMRegister dst = i.OutputSimd128Register();
    2246         140 :       if (instr->InputAt(0)->IsFPRegister()) {
    2247         140 :         __ movss(dst, i.InputDoubleRegister(0));
    2248             :       } else {
    2249           0 :         __ movss(dst, i.InputOperand(0));
    2250             :       }
    2251         140 :       __ shufps(dst, dst, 0x0);
    2252             :       break;
    2253             :     }
    2254             :     case kX64F32x4ExtractLane: {
    2255             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2256          64 :       __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
    2257          32 :       __ movd(i.OutputDoubleRegister(), kScratchRegister);
    2258             :       break;
    2259             :     }
    2260             :     case kX64F32x4ReplaceLane: {
    2261             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2262             :       // The insertps instruction uses imm8[5:4] to indicate the lane
    2263             :       // that needs to be replaced.
    2264          32 :       byte select = i.InputInt8(1) << 4 & 0x30;
    2265          32 :       if (instr->InputAt(2)->IsFPRegister()) {
    2266          64 :         __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
    2267          32 :                     select);
    2268             :       } else {
    2269           0 :         __ insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
    2270             :       }
    2271             :       break;
    2272             :     }
    2273             :     case kX64F32x4SConvertI32x4: {
    2274           4 :       __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2275           4 :       break;
    2276             :     }
    2277             :     case kX64F32x4UConvertI32x4: {
    2278             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2279             :       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
    2280             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2281             :       XMMRegister dst = i.OutputSimd128Register();
    2282           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);      // zeros
    2283           4 :       __ pblendw(kScratchDoubleReg, dst, 0x55);           // get lo 16 bits
    2284             :       __ psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
    2285           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
    2286           4 :       __ psrld(dst, 1);                  // divide by 2 to get in unsigned range
    2287           4 :       __ cvtdq2ps(dst, dst);             // convert hi exactly
    2288           4 :       __ addps(dst, dst);                // double hi, exactly
    2289           4 :       __ addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
    2290             :       break;
    2291             :     }
    2292             :     case kX64F32x4Abs: {
    2293             :       XMMRegister dst = i.OutputSimd128Register();
    2294             :       XMMRegister src = i.InputSimd128Register(0);
    2295           4 :       if (dst == src) {
    2296           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2297           4 :         __ psrld(kScratchDoubleReg, 1);
    2298           4 :         __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
    2299             :       } else {
    2300           0 :         __ pcmpeqd(dst, dst);
    2301           0 :         __ psrld(dst, 1);
    2302           0 :         __ andps(dst, i.InputSimd128Register(0));
    2303             :       }
    2304             :       break;
    2305             :     }
    2306             :     case kX64F32x4Neg: {
    2307             :       XMMRegister dst = i.OutputSimd128Register();
    2308             :       XMMRegister src = i.InputSimd128Register(0);
    2309           4 :       if (dst == src) {
    2310           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2311           4 :         __ pslld(kScratchDoubleReg, 31);
    2312           4 :         __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
    2313             :       } else {
    2314           0 :         __ pcmpeqd(dst, dst);
    2315           0 :         __ pslld(dst, 31);
    2316           0 :         __ xorps(dst, i.InputSimd128Register(0));
    2317             :       }
    2318             :       break;
    2319             :     }
    2320             :     case kX64F32x4RecipApprox: {
    2321           4 :       __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2322           4 :       break;
    2323             :     }
    2324             :     case kX64F32x4RecipSqrtApprox: {
    2325           4 :       __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2326           4 :       break;
    2327             :     }
    2328             :     case kX64F32x4Add: {
    2329             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2330          12 :       __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2331          12 :       break;
    2332             :     }
    2333             :     case kX64F32x4AddHoriz: {
    2334             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2335             :       CpuFeatureScope sse_scope(tasm(), SSE3);
    2336           4 :       __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2337             :       break;
    2338             :     }
    2339             :     case kX64F32x4Sub: {
    2340             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2341           4 :       __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2342           4 :       break;
    2343             :     }
    2344             :     case kX64F32x4Mul: {
    2345             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2346           4 :       __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2347           4 :       break;
    2348             :     }
    2349             :     case kX64F32x4Min: {
    2350             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2351             :       // minps doesn't propagate NaN lanes in the first source. Compare this
    2352             :       // with itself to generate 1's in those lanes (quiet NaNs) and or them
    2353             :       // with the result of minps to simulate NaN propagation.
    2354           4 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
    2355           4 :       __ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
    2356           4 :       __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2357           4 :       __ orps(i.OutputSimd128Register(), kScratchDoubleReg);
    2358           4 :       break;
    2359             :     }
    2360             :     case kX64F32x4Max: {
    2361             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2362             :       // maxps doesn't propagate NaN lanes in the first source. Compare this
    2363             :       // with itself to generate 1's in those lanes (quiet NaNs) and or them
    2364             :       // with the result of maxps to simulate NaN propagation.
    2365           4 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(0));
    2366           4 :       __ cmpps(kScratchDoubleReg, kScratchDoubleReg, 0x4);
    2367           4 :       __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2368           4 :       __ orps(i.OutputSimd128Register(), kScratchDoubleReg);
    2369           4 :       break;
    2370             :     }
    2371             :     case kX64F32x4Eq: {
    2372             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2373           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
    2374           4 :       break;
    2375             :     }
    2376             :     case kX64F32x4Ne: {
    2377             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2378           4 :       __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
    2379           4 :       break;
    2380             :     }
    2381             :     case kX64F32x4Lt: {
    2382             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2383           8 :       __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2384             :       break;
    2385             :     }
    2386             :     case kX64F32x4Le: {
    2387             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2388           8 :       __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2389             :       break;
    2390             :     }
    2391             :     case kX64I32x4Splat: {
    2392        1072 :       XMMRegister dst = i.OutputSimd128Register();
    2393        1072 :       if (instr->InputAt(0)->IsRegister()) {
    2394        1072 :         __ movd(dst, i.InputRegister(0));
    2395             :       } else {
    2396           0 :         __ movd(dst, i.InputOperand(0));
    2397             :       }
    2398        1072 :       __ pshufd(dst, dst, 0x0);
    2399             :       break;
    2400             :     }
    2401             :     case kX64I32x4ExtractLane: {
    2402             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2403        3768 :       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
    2404             :       break;
    2405             :     }
    2406             :     case kX64I32x4ReplaceLane: {
    2407             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2408        1784 :       if (instr->InputAt(2)->IsRegister()) {
    2409         344 :         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
    2410         344 :                   i.InputInt8(1));
    2411             :       } else {
    2412        2880 :         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2413             :       }
    2414             :       break;
    2415             :     }
    2416             :     case kX64I32x4SConvertF32x4: {
    2417             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2418             :       XMMRegister dst = i.OutputSimd128Register();
    2419             :       // NAN->0
    2420           4 :       __ movaps(kScratchDoubleReg, dst);
    2421             :       __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
    2422             :       __ pand(dst, kScratchDoubleReg);
    2423             :       // Set top bit if >= 0 (but not -0.0!)
    2424             :       __ pxor(kScratchDoubleReg, dst);
    2425             :       // Convert
    2426           4 :       __ cvttps2dq(dst, dst);
    2427             :       // Set top bit if >=0 is now < 0
    2428             :       __ pand(kScratchDoubleReg, dst);
    2429           4 :       __ psrad(kScratchDoubleReg, 31);
    2430             :       // Set positive overflow lanes to 0x7FFFFFFF
    2431             :       __ pxor(dst, kScratchDoubleReg);
    2432             :       break;
    2433             :     }
    2434             :     case kX64I32x4SConvertI16x8Low: {
    2435             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2436           4 :       __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2437             :       break;
    2438             :     }
    2439             :     case kX64I32x4SConvertI16x8High: {
    2440             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2441             :       XMMRegister dst = i.OutputSimd128Register();
    2442           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2443             :       __ pmovsxwd(dst, dst);
    2444             :       break;
    2445             :     }
    2446             :     case kX64I32x4Neg: {
    2447             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2448             :       XMMRegister dst = i.OutputSimd128Register();
    2449             :       XMMRegister src = i.InputSimd128Register(0);
    2450           4 :       if (dst == src) {
    2451           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2452             :         __ psignd(dst, kScratchDoubleReg);
    2453             :       } else {
    2454           0 :         __ pxor(dst, dst);
    2455             :         __ psubd(dst, src);
    2456             :       }
    2457             :       break;
    2458             :     }
    2459             :     case kX64I32x4Shl: {
    2460         248 :       __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
    2461         124 :       break;
    2462             :     }
    2463             :     case kX64I32x4ShrS: {
    2464         248 :       __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
    2465         124 :       break;
    2466             :     }
    2467             :     case kX64I32x4Add: {
    2468          12 :       __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2469             :       break;
    2470             :     }
    2471             :     case kX64I32x4AddHoriz: {
    2472             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2473           4 :       __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2474             :       break;
    2475             :     }
    2476             :     case kX64I32x4Sub: {
    2477           4 :       __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2478             :       break;
    2479             :     }
    2480             :     case kX64I32x4Mul: {
    2481             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2482           4 :       __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2483             :       break;
    2484             :     }
    2485             :     case kX64I32x4MinS: {
    2486             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2487           4 :       __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2488             :       break;
    2489             :     }
    2490             :     case kX64I32x4MaxS: {
    2491             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2492           4 :       __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2493             :       break;
    2494             :     }
    2495             :     case kX64I32x4Eq: {
    2496          12 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2497             :       break;
    2498             :     }
    2499             :     case kX64I32x4Ne: {
    2500          16 :       __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2501             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2502             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2503             :       break;
    2504             :     }
    2505             :     case kX64I32x4GtS: {
    2506           8 :       __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2507             :       break;
    2508             :     }
    2509             :     case kX64I32x4GeS: {
    2510             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2511             :       XMMRegister dst = i.OutputSimd128Register();
    2512             :       XMMRegister src = i.InputSimd128Register(1);
    2513           8 :       __ pminsd(dst, src);
    2514             :       __ pcmpeqd(dst, src);
    2515             :       break;
    2516             :     }
    2517             :     case kX64I32x4UConvertF32x4: {
    2518             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2519             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2520             :       XMMRegister dst = i.OutputSimd128Register();
    2521             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2522             :       // NAN->0, negative->0
    2523           4 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2524           4 :       __ maxps(dst, kScratchDoubleReg);
    2525             :       // scratch: float representation of max_signed
    2526             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2527           4 :       __ psrld(kScratchDoubleReg, 1);                     // 0x7fffffff
    2528           4 :       __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // 0x4f000000
    2529             :       // tmp: convert (src-max_signed).
    2530             :       // Positive overflow lanes -> 0x7FFFFFFF
    2531             :       // Negative lanes -> 0
    2532           4 :       __ movaps(tmp, dst);
    2533           4 :       __ subps(tmp, kScratchDoubleReg);
    2534             :       __ cmpleps(kScratchDoubleReg, tmp);
    2535           4 :       __ cvttps2dq(tmp, tmp);
    2536             :       __ pxor(tmp, kScratchDoubleReg);
    2537             :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    2538             :       __ pmaxsd(tmp, kScratchDoubleReg);
    2539             :       // convert. Overflow lanes above max_signed will be 0x80000000
    2540           4 :       __ cvttps2dq(dst, dst);
    2541             :       // Add (src-max_signed) for overflow lanes.
    2542             :       __ paddd(dst, tmp);
    2543             :       break;
    2544             :     }
    2545             :     case kX64I32x4UConvertI16x8Low: {
    2546             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2547           4 :       __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2548             :       break;
    2549             :     }
    2550             :     case kX64I32x4UConvertI16x8High: {
    2551             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2552             :       XMMRegister dst = i.OutputSimd128Register();
    2553           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2554             :       __ pmovzxwd(dst, dst);
    2555             :       break;
    2556             :     }
    2557             :     case kX64I32x4ShrU: {
    2558         248 :       __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
    2559         124 :       break;
    2560             :     }
    2561             :     case kX64I32x4MinU: {
    2562             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2563           4 :       __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2564             :       break;
    2565             :     }
    2566             :     case kX64I32x4MaxU: {
    2567             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2568           4 :       __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2569             :       break;
    2570             :     }
    2571             :     case kX64I32x4GtU: {
    2572             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2573             :       XMMRegister dst = i.OutputSimd128Register();
    2574             :       XMMRegister src = i.InputSimd128Register(1);
    2575           8 :       __ pmaxud(dst, src);
    2576             :       __ pcmpeqd(dst, src);
    2577             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2578             :       __ pxor(dst, kScratchDoubleReg);
    2579             :       break;
    2580             :     }
    2581             :     case kX64I32x4GeU: {
    2582             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2583             :       XMMRegister dst = i.OutputSimd128Register();
    2584             :       XMMRegister src = i.InputSimd128Register(1);
    2585           8 :       __ pminud(dst, src);
    2586             :       __ pcmpeqd(dst, src);
    2587             :       break;
    2588             :     }
    2589             :     case kX64S128Zero: {
    2590          16 :       XMMRegister dst = i.OutputSimd128Register();
    2591          16 :       __ xorps(dst, dst);
    2592             :       break;
    2593             :     }
    2594             :     case kX64I16x8Splat: {
    2595         404 :       XMMRegister dst = i.OutputSimd128Register();
    2596         404 :       if (instr->InputAt(0)->IsRegister()) {
    2597         404 :         __ movd(dst, i.InputRegister(0));
    2598             :       } else {
    2599           0 :         __ movd(dst, i.InputOperand(0));
    2600             :       }
    2601         404 :       __ pshuflw(dst, dst, 0x0);
    2602         404 :       __ pshufd(dst, dst, 0x0);
    2603             :       break;
    2604             :     }
    2605             :     case kX64I16x8ExtractLane: {
    2606             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2607          32 :       Register dst = i.OutputRegister();
    2608          64 :       __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2609          32 :       __ movsxwl(dst, dst);
    2610             :       break;
    2611             :     }
    2612             :     case kX64I16x8ReplaceLane: {
    2613             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2614          52 :       if (instr->InputAt(2)->IsRegister()) {
    2615         104 :         __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
    2616          52 :                   i.InputInt8(1));
    2617             :       } else {
    2618           0 :         __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2619             :       }
    2620             :       break;
    2621             :     }
    2622             :     case kX64I16x8SConvertI8x16Low: {
    2623             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2624           4 :       __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2625             :       break;
    2626             :     }
    2627             :     case kX64I16x8SConvertI8x16High: {
    2628             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2629             :       XMMRegister dst = i.OutputSimd128Register();
    2630           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2631             :       __ pmovsxbw(dst, dst);
    2632             :       break;
    2633             :     }
    2634             :     case kX64I16x8Neg: {
    2635             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2636             :       XMMRegister dst = i.OutputSimd128Register();
    2637             :       XMMRegister src = i.InputSimd128Register(0);
    2638           4 :       if (dst == src) {
    2639           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2640             :         __ psignw(dst, kScratchDoubleReg);
    2641             :       } else {
    2642           0 :         __ pxor(dst, dst);
    2643             :         __ psubw(dst, src);
    2644             :       }
    2645             :       break;
    2646             :     }
    2647             :     case kX64I16x8Shl: {
    2648         120 :       __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
    2649          60 :       break;
    2650             :     }
    2651             :     case kX64I16x8ShrS: {
    2652         120 :       __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
    2653          60 :       break;
    2654             :     }
    2655             :     case kX64I16x8SConvertI32x4: {
    2656             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2657           4 :       __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2658             :       break;
    2659             :     }
    2660             :     case kX64I16x8Add: {
    2661           4 :       __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2662             :       break;
    2663             :     }
    2664             :     case kX64I16x8AddSaturateS: {
    2665           4 :       __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2666             :       break;
    2667             :     }
    2668             :     case kX64I16x8AddHoriz: {
    2669             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2670           4 :       __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2671             :       break;
    2672             :     }
    2673             :     case kX64I16x8Sub: {
    2674           4 :       __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2675             :       break;
    2676             :     }
    2677             :     case kX64I16x8SubSaturateS: {
    2678           4 :       __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2679             :       break;
    2680             :     }
    2681             :     case kX64I16x8Mul: {
    2682             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2683           4 :       __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2684             :       break;
    2685             :     }
    2686             :     case kX64I16x8MinS: {
    2687             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2688           4 :       __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2689             :       break;
    2690             :     }
    2691             :     case kX64I16x8MaxS: {
    2692             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2693           4 :       __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2694             :       break;
    2695             :     }
    2696             :     case kX64I16x8Eq: {
    2697          12 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2698             :       break;
    2699             :     }
    2700             :     case kX64I16x8Ne: {
    2701          16 :       __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2702             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2703             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2704             :       break;
    2705             :     }
    2706             :     case kX64I16x8GtS: {
    2707           8 :       __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2708             :       break;
    2709             :     }
    2710             :     case kX64I16x8GeS: {
    2711             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2712             :       XMMRegister dst = i.OutputSimd128Register();
    2713             :       XMMRegister src = i.InputSimd128Register(1);
    2714           8 :       __ pminsw(dst, src);
    2715             :       __ pcmpeqw(dst, src);
    2716             :       break;
    2717             :     }
    2718             :     case kX64I16x8UConvertI8x16Low: {
    2719             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2720           4 :       __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2721             :       break;
    2722             :     }
    2723             :     case kX64I16x8UConvertI8x16High: {
    2724             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2725             :       XMMRegister dst = i.OutputSimd128Register();
    2726           4 :       __ palignr(dst, i.InputSimd128Register(0), 8);
    2727             :       __ pmovzxbw(dst, dst);
    2728             :       break;
    2729             :     }
    2730             :     case kX64I16x8ShrU: {
    2731         120 :       __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
    2732          60 :       break;
    2733             :     }
    2734             :     case kX64I16x8UConvertI32x4: {
    2735             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2736             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2737             :       XMMRegister dst = i.OutputSimd128Register();
    2738             :       // Change negative lanes to 0x7FFFFFFF
    2739           4 :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2740           4 :       __ psrld(kScratchDoubleReg, 1);
    2741             :       __ pminud(dst, kScratchDoubleReg);
    2742             :       __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
    2743             :       __ packusdw(dst, kScratchDoubleReg);
    2744             :       break;
    2745             :     }
    2746             :     case kX64I16x8AddSaturateU: {
    2747           4 :       __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2748             :       break;
    2749             :     }
    2750             :     case kX64I16x8SubSaturateU: {
    2751           4 :       __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2752             :       break;
    2753             :     }
    2754             :     case kX64I16x8MinU: {
    2755             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2756           4 :       __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2757             :       break;
    2758             :     }
    2759             :     case kX64I16x8MaxU: {
    2760             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2761           4 :       __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2762             :       break;
    2763             :     }
    2764             :     case kX64I16x8GtU: {
    2765             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2766             :       XMMRegister dst = i.OutputSimd128Register();
    2767             :       XMMRegister src = i.InputSimd128Register(1);
    2768           8 :       __ pmaxuw(dst, src);
    2769             :       __ pcmpeqw(dst, src);
    2770             :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2771             :       __ pxor(dst, kScratchDoubleReg);
    2772             :       break;
    2773             :     }
    2774             :     case kX64I16x8GeU: {
    2775             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2776             :       XMMRegister dst = i.OutputSimd128Register();
    2777             :       XMMRegister src = i.InputSimd128Register(1);
    2778          16 :       __ pminuw(dst, src);
    2779             :       __ pcmpeqw(dst, src);
    2780             :       break;
    2781             :     }
    2782             :     case kX64I8x16Splat: {
    2783             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2784             :       XMMRegister dst = i.OutputSimd128Register();
    2785         304 :       if (instr->InputAt(0)->IsRegister()) {
    2786         304 :         __ movd(dst, i.InputRegister(0));
    2787             :       } else {
    2788           0 :         __ movd(dst, i.InputOperand(0));
    2789             :       }
    2790         304 :       __ xorps(kScratchDoubleReg, kScratchDoubleReg);
    2791             :       __ pshufb(dst, kScratchDoubleReg);
    2792             :       break;
    2793             :     }
    2794             :     case kX64I8x16ExtractLane: {
    2795             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2796          32 :       Register dst = i.OutputRegister();
    2797          64 :       __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
    2798          32 :       __ movsxbl(dst, dst);
    2799             :       break;
    2800             :     }
    2801             :     case kX64I8x16ReplaceLane: {
    2802             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2803          84 :       if (instr->InputAt(2)->IsRegister()) {
    2804         168 :         __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
    2805          84 :                   i.InputInt8(1));
    2806             :       } else {
    2807           0 :         __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
    2808             :       }
    2809             :       break;
    2810             :     }
    2811             :     case kX64I8x16SConvertI16x8: {
    2812             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2813           4 :       __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2814             :       break;
    2815             :     }
    2816             :     case kX64I8x16Neg: {
    2817             :       CpuFeatureScope sse_scope(tasm(), SSSE3);
    2818             :       XMMRegister dst = i.OutputSimd128Register();
    2819             :       XMMRegister src = i.InputSimd128Register(0);
    2820           4 :       if (dst == src) {
    2821           4 :         __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    2822             :         __ psignb(dst, kScratchDoubleReg);
    2823             :       } else {
    2824           0 :         __ pxor(dst, dst);
    2825             :         __ psubb(dst, src);
    2826             :       }
    2827             :       break;
    2828             :     }
    2829             :     case kX64I8x16Shl: {
    2830             :       XMMRegister dst = i.OutputSimd128Register();
    2831             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2832          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2833          28 :       if (shift < 4) {
    2834             :         // For small shifts, doubling is faster.
    2835          60 :         for (int i = 0; i < shift; ++i) {
    2836          24 :           __ paddb(dst, dst);
    2837             :         }
    2838             :       } else {
    2839             :         // Mask off the unwanted bits before word-shifting.
    2840          16 :         __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2841          16 :         __ psrlw(kScratchDoubleReg, 8 + shift);
    2842             :         __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
    2843             :         __ pand(dst, kScratchDoubleReg);
    2844          16 :         __ psllw(dst, shift);
    2845             :       }
    2846             :       break;
    2847             :     }
    2848             :     case kX64I8x16ShrS: {
    2849             :       XMMRegister dst = i.OutputSimd128Register();
    2850             :       XMMRegister src = i.InputSimd128Register(0);
    2851          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2852             :       // Unpack the bytes into words, do arithmetic shifts, and repack.
    2853          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2854             :       __ punpcklbw(dst, src);
    2855          28 :       __ psraw(kScratchDoubleReg, 8 + shift);
    2856          28 :       __ psraw(dst, 8 + shift);
    2857             :       __ packsswb(dst, kScratchDoubleReg);
    2858             :       break;
    2859             :     }
    2860             :     case kX64I8x16Add: {
    2861           4 :       __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2862             :       break;
    2863             :     }
    2864             :     case kX64I8x16AddSaturateS: {
    2865           4 :       __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2866             :       break;
    2867             :     }
    2868             :     case kX64I8x16Sub: {
    2869           4 :       __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2870             :       break;
    2871             :     }
    2872             :     case kX64I8x16SubSaturateS: {
    2873           4 :       __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2874             :       break;
    2875             :     }
    2876             :     case kX64I8x16Mul: {
    2877             :       XMMRegister dst = i.OutputSimd128Register();
    2878             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    2879             :       XMMRegister right = i.InputSimd128Register(1);
    2880             :       XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
    2881             :       // I16x8 view of I8x16
    2882             :       // left = AAaa AAaa ... AAaa AAaa
    2883             :       // right= BBbb BBbb ... BBbb BBbb
    2884             :       // t = 00AA 00AA ... 00AA 00AA
    2885             :       // s = 00BB 00BB ... 00BB 00BB
    2886           4 :       __ movaps(tmp, dst);
    2887           4 :       __ movaps(kScratchDoubleReg, right);
    2888           4 :       __ psrlw(tmp, 8);
    2889           4 :       __ psrlw(kScratchDoubleReg, 8);
    2890             :       // dst = left * 256
    2891           4 :       __ psllw(dst, 8);
    2892             :       // t = I16x8Mul(t, s)
    2893             :       //    => __PP __PP ...  __PP  __PP
    2894             :       __ pmullw(tmp, kScratchDoubleReg);
    2895             :       // dst = I16x8Mul(left * 256, right)
    2896             :       //    => pp__ pp__ ...  pp__  pp__
    2897             :       __ pmullw(dst, right);
    2898             :       // t = I16x8Shl(t, 8)
    2899             :       //    => PP00 PP00 ...  PP00  PP00
    2900           4 :       __ psllw(tmp, 8);
    2901             :       // dst = I16x8Shr(dst, 8)
    2902             :       //    => 00pp 00pp ...  00pp  00pp
    2903           4 :       __ psrlw(dst, 8);
    2904             :       // dst = I16x8Or(dst, t)
    2905             :       //    => PPpp PPpp ...  PPpp  PPpp
    2906             :       __ por(dst, tmp);
    2907             :       break;
    2908             :     }
    2909             :     case kX64I8x16MinS: {
    2910             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2911           4 :       __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2912             :       break;
    2913             :     }
    2914             :     case kX64I8x16MaxS: {
    2915             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2916           4 :       __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2917             :       break;
    2918             :     }
    2919             :     case kX64I8x16Eq: {
    2920          12 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2921             :       break;
    2922             :     }
    2923             :     case kX64I8x16Ne: {
    2924          16 :       __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2925             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2926             :       __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
    2927             :       break;
    2928             :     }
    2929             :     case kX64I8x16GtS: {
    2930           8 :       __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2931             :       break;
    2932             :     }
    2933             :     case kX64I8x16GeS: {
    2934             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2935             :       XMMRegister dst = i.OutputSimd128Register();
    2936             :       XMMRegister src = i.InputSimd128Register(1);
    2937           8 :       __ pminsb(dst, src);
    2938             :       __ pcmpeqb(dst, src);
    2939             :       break;
    2940             :     }
    2941             :     case kX64I8x16UConvertI16x8: {
    2942             :       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    2943             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2944             :       XMMRegister dst = i.OutputSimd128Register();
    2945             :       // Change negative lanes to 0x7FFF
    2946           4 :       __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
    2947           4 :       __ psrlw(kScratchDoubleReg, 1);
    2948             :       __ pminuw(dst, kScratchDoubleReg);
    2949             :       __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
    2950             :       __ packuswb(dst, kScratchDoubleReg);
    2951             :       break;
    2952             :     }
    2953             :     case kX64I8x16ShrU: {
    2954             :       XMMRegister dst = i.OutputSimd128Register();
    2955             :       XMMRegister src = i.InputSimd128Register(0);
    2956          28 :       int8_t shift = i.InputInt8(1) & 0x7;
    2957             :       // Unpack the bytes into words, do logical shifts, and repack.
    2958          28 :       __ punpckhbw(kScratchDoubleReg, src);
    2959             :       __ punpcklbw(dst, src);
    2960          28 :       __ psrlw(kScratchDoubleReg, 8 + shift);
    2961          28 :       __ psrlw(dst, 8 + shift);
    2962             :       __ packuswb(dst, kScratchDoubleReg);
    2963             :       break;
    2964             :     }
    2965             :     case kX64I8x16AddSaturateU: {
    2966           4 :       __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2967             :       break;
    2968             :     }
    2969             :     case kX64I8x16SubSaturateU: {
    2970           4 :       __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2971             :       break;
    2972             :     }
    2973             :     case kX64I8x16MinU: {
    2974             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2975           4 :       __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2976             :       break;
    2977             :     }
    2978             :     case kX64I8x16MaxU: {
    2979             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2980           4 :       __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
    2981             :       break;
    2982             :     }
    2983             :     case kX64I8x16GtU: {
    2984             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2985             :       XMMRegister dst = i.OutputSimd128Register();
    2986             :       XMMRegister src = i.InputSimd128Register(1);
    2987           8 :       __ pmaxub(dst, src);
    2988             :       __ pcmpeqb(dst, src);
    2989             :       __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
    2990             :       __ pxor(dst, kScratchDoubleReg);
    2991             :       break;
    2992             :     }
    2993             :     case kX64I8x16GeU: {
    2994             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    2995             :       XMMRegister dst = i.OutputSimd128Register();
    2996             :       XMMRegister src = i.InputSimd128Register(1);
    2997           0 :       __ pminub(dst, src);
    2998             :       __ pcmpeqb(dst, src);
    2999             :       break;
    3000             :     }
    3001             :     case kX64S128And: {
    3002           4 :       __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3003             :       break;
    3004             :     }
    3005             :     case kX64S128Or: {
    3006           4 :       __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3007             :       break;
    3008             :     }
    3009             :     case kX64S128Xor: {
    3010           4 :       __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
    3011             :       break;
    3012             :     }
    3013             :     case kX64S128Not: {
    3014             :       XMMRegister dst = i.OutputSimd128Register();
    3015             :       XMMRegister src = i.InputSimd128Register(0);
    3016           4 :       if (dst == src) {
    3017           4 :         __ movaps(kScratchDoubleReg, dst);
    3018             :         __ pcmpeqd(dst, dst);
    3019             :         __ pxor(dst, kScratchDoubleReg);
    3020             :       } else {
    3021           0 :         __ pcmpeqd(dst, dst);
    3022             :         __ pxor(dst, src);
    3023             :       }
    3024             : 
    3025             :       break;
    3026             :     }
    3027             :     case kX64S128Select: {
    3028             :       // Mask used here is stored in dst.
    3029          28 :       XMMRegister dst = i.OutputSimd128Register();
    3030          28 :       __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
    3031          28 :       __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
    3032          28 :       __ andps(dst, kScratchDoubleReg);
    3033          28 :       __ xorps(dst, i.InputSimd128Register(2));
    3034             :       break;
    3035             :     }
    3036             :     case kX64S8x16Shuffle: {
    3037             :       XMMRegister dst = i.OutputSimd128Register();
    3038             :       Register tmp = i.TempRegister(0);
    3039             :       // Prepare 16 byte aligned buffer for shuffle control mask
    3040        1396 :       __ movq(tmp, rsp);
    3041             :       __ andq(rsp, Immediate(-16));
    3042        1396 :       if (instr->InputCount() == 5) {  // only one input operand
    3043         640 :         uint32_t mask[4] = {};
    3044             :         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
    3045        5760 :         for (int j = 4; j > 0; j--) {
    3046        5120 :           mask[j - 1] = i.InputUint32(j);
    3047             :         }
    3048             : 
    3049         640 :         SetupShuffleMaskOnStack(tasm(), mask);
    3050        1280 :         __ pshufb(dst, Operand(rsp, 0));
    3051             :       } else {  // two input operands
    3052             :         DCHECK_EQ(6, instr->InputCount());
    3053        1512 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 0);
    3054         756 :         uint32_t mask[4] = {};
    3055        6804 :         for (int j = 5; j > 1; j--) {
    3056        3024 :           uint32_t lanes = i.InputUint32(j);
    3057       27216 :           for (int k = 0; k < 32; k += 8) {
    3058       12096 :             uint8_t lane = lanes >> k;
    3059       12096 :             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
    3060             :           }
    3061             :         }
    3062         756 :         SetupShuffleMaskOnStack(tasm(), mask);
    3063        1512 :         __ pshufb(kScratchDoubleReg, Operand(rsp, 0));
    3064         756 :         uint32_t mask1[4] = {};
    3065         756 :         if (instr->InputAt(1)->IsSimd128Register()) {
    3066             :           XMMRegister src1 = i.InputSimd128Register(1);
    3067         756 :           if (src1 != dst) __ movups(dst, src1);
    3068             :         } else {
    3069           0 :           __ movups(dst, i.InputOperand(1));
    3070             :         }
    3071        6804 :         for (int j = 5; j > 1; j--) {
    3072        3024 :           uint32_t lanes = i.InputUint32(j);
    3073       27216 :           for (int k = 0; k < 32; k += 8) {
    3074       12096 :             uint8_t lane = lanes >> k;
    3075       12096 :             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
    3076             :           }
    3077             :         }
    3078         756 :         SetupShuffleMaskOnStack(tasm(), mask1);
    3079        1512 :         __ pshufb(dst, Operand(rsp, 0));
    3080             :         __ por(dst, kScratchDoubleReg);
    3081             :       }
    3082             :       __ movq(rsp, tmp);
    3083             :       break;
    3084             :     }
    3085             :     case kX64S32x4Swizzle: {
    3086             :       DCHECK_EQ(2, instr->InputCount());
    3087         924 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0,
    3088             :                               i.InputInt8(1));
    3089             :       break;
    3090             :     }
    3091             :     case kX64S32x4Shuffle: {
    3092             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3093             :       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
    3094             :       int8_t shuffle = i.InputInt8(2);
    3095             :       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
    3096         920 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, kScratchDoubleReg, 1, shuffle);
    3097         920 :       ASSEMBLE_SIMD_IMM_INSTR(pshufd, i.OutputSimd128Register(), 0, shuffle);
    3098         920 :       __ pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
    3099             :       break;
    3100             :     }
    3101             :     case kX64S16x8Blend: {
    3102         112 :       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
    3103          56 :       break;
    3104             :     }
    3105             :     case kX64S16x8HalfShuffle1: {
    3106         352 :       XMMRegister dst = i.OutputSimd128Register();
    3107        1056 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(1));
    3108         352 :       __ pshufhw(dst, dst, i.InputInt8(2));
    3109             :       break;
    3110             :     }
    3111             :     case kX64S16x8HalfShuffle2: {
    3112             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3113         176 :       XMMRegister dst = i.OutputSimd128Register();
    3114         528 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, kScratchDoubleReg, 1, i.InputInt8(2));
    3115         176 :       __ pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
    3116         528 :       ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, i.InputInt8(2));
    3117         176 :       __ pshufhw(dst, dst, i.InputInt8(3));
    3118         176 :       __ pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
    3119             :       break;
    3120             :     }
    3121             :     case kX64S8x16Alignr: {
    3122         480 :       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
    3123         240 :       break;
    3124             :     }
    3125             :     case kX64S16x8Dup: {
    3126         164 :       XMMRegister dst = i.OutputSimd128Register();
    3127         164 :       int8_t lane = i.InputInt8(1) & 0x7;
    3128         164 :       int8_t lane4 = lane & 0x3;
    3129         164 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3130         164 :       if (lane < 4) {
    3131         328 :         ASSEMBLE_SIMD_IMM_INSTR(pshuflw, dst, 0, half_dup);
    3132         164 :         __ pshufd(dst, dst, 0);
    3133             :       } else {
    3134           0 :         ASSEMBLE_SIMD_IMM_INSTR(pshufhw, dst, 0, half_dup);
    3135           0 :         __ pshufd(dst, dst, 0xaa);
    3136             :       }
    3137             :       break;
    3138             :     }
    3139             :     case kX64S8x16Dup: {
    3140             :       XMMRegister dst = i.OutputSimd128Register();
    3141         224 :       int8_t lane = i.InputInt8(1) & 0xf;
    3142             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3143         224 :       if (lane < 8) {
    3144         224 :         __ punpcklbw(dst, dst);
    3145             :       } else {
    3146           0 :         __ punpckhbw(dst, dst);
    3147             :       }
    3148         224 :       lane &= 0x7;
    3149         224 :       int8_t lane4 = lane & 0x3;
    3150         224 :       int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
    3151         224 :       if (lane < 4) {
    3152         208 :         __ pshuflw(dst, dst, half_dup);
    3153         208 :         __ pshufd(dst, dst, 0);
    3154             :       } else {
    3155          16 :         __ pshufhw(dst, dst, half_dup);
    3156          16 :         __ pshufd(dst, dst, 0xaa);
    3157             :       }
    3158             :       break;
    3159             :     }
    3160             :     case kX64S64x2UnpackHigh:
    3161           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
    3162             :       break;
    3163             :     case kX64S32x4UnpackHigh:
    3164         300 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
    3165             :       break;
    3166             :     case kX64S16x8UnpackHigh:
    3167         348 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
    3168             :       break;
    3169             :     case kX64S8x16UnpackHigh:
    3170         264 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
    3171             :       break;
    3172             :     case kX64S64x2UnpackLow:
    3173           0 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
    3174             :       break;
    3175             :     case kX64S32x4UnpackLow:
    3176         312 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
    3177             :       break;
    3178             :     case kX64S16x8UnpackLow:
    3179         192 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
    3180             :       break;
    3181             :     case kX64S8x16UnpackLow:
    3182         348 :       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
    3183             :       break;
    3184             :     case kX64S16x8UnzipHigh: {
    3185             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3186             :       XMMRegister dst = i.OutputSimd128Register();
    3187             :       XMMRegister src2 = dst;
    3188             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3189          64 :       if (instr->InputCount() == 2) {
    3190         112 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3191          56 :         __ psrld(kScratchDoubleReg, 16);
    3192             :         src2 = kScratchDoubleReg;
    3193             :       }
    3194          64 :       __ psrld(dst, 16);
    3195             :       __ packusdw(dst, src2);
    3196             :       break;
    3197             :     }
    3198             :     case kX64S16x8UnzipLow: {
    3199             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3200             :       XMMRegister dst = i.OutputSimd128Register();
    3201             :       XMMRegister src2 = dst;
    3202             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3203          72 :       __ pxor(kScratchDoubleReg, kScratchDoubleReg);
    3204          72 :       if (instr->InputCount() == 2) {
    3205         128 :         ASSEMBLE_SIMD_IMM_INSTR(pblendw, kScratchDoubleReg, 1, 0x55);
    3206             :         src2 = kScratchDoubleReg;
    3207             :       }
    3208          72 :       __ pblendw(dst, kScratchDoubleReg, 0xaa);
    3209             :       __ packusdw(dst, src2);
    3210             :       break;
    3211             :     }
    3212             :     case kX64S8x16UnzipHigh: {
    3213             :       XMMRegister dst = i.OutputSimd128Register();
    3214             :       XMMRegister src2 = dst;
    3215             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3216          84 :       if (instr->InputCount() == 2) {
    3217         136 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3218          68 :         __ psrlw(kScratchDoubleReg, 8);
    3219             :         src2 = kScratchDoubleReg;
    3220             :       }
    3221          84 :       __ psrlw(dst, 8);
    3222             :       __ packuswb(dst, src2);
    3223             :       break;
    3224             :     }
    3225             :     case kX64S8x16UnzipLow: {
    3226             :       XMMRegister dst = i.OutputSimd128Register();
    3227             :       XMMRegister src2 = dst;
    3228             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3229         128 :       if (instr->InputCount() == 2) {
    3230         224 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3231         112 :         __ psllw(kScratchDoubleReg, 8);
    3232         112 :         __ psrlw(kScratchDoubleReg, 8);
    3233             :         src2 = kScratchDoubleReg;
    3234             :       }
    3235         128 :       __ psllw(dst, 8);
    3236         128 :       __ psrlw(dst, 8);
    3237             :       __ packuswb(dst, src2);
    3238             :       break;
    3239             :     }
    3240             :     case kX64S8x16TransposeLow: {
    3241             :       XMMRegister dst = i.OutputSimd128Register();
    3242             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3243          96 :       __ psllw(dst, 8);
    3244          96 :       if (instr->InputCount() == 1) {
    3245           8 :         __ movups(kScratchDoubleReg, dst);
    3246             :       } else {
    3247             :         DCHECK_EQ(2, instr->InputCount());
    3248         176 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3249          88 :         __ psllw(kScratchDoubleReg, 8);
    3250             :       }
    3251          96 :       __ psrlw(dst, 8);
    3252             :       __ por(dst, kScratchDoubleReg);
    3253             :       break;
    3254             :     }
    3255             :     case kX64S8x16TransposeHigh: {
    3256             :       XMMRegister dst = i.OutputSimd128Register();
    3257             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3258         124 :       __ psrlw(dst, 8);
    3259         124 :       if (instr->InputCount() == 1) {
    3260          24 :         __ movups(kScratchDoubleReg, dst);
    3261             :       } else {
    3262             :         DCHECK_EQ(2, instr->InputCount());
    3263         200 :         ASSEMBLE_SIMD_INSTR(movups, kScratchDoubleReg, 1);
    3264         100 :         __ psrlw(kScratchDoubleReg, 8);
    3265             :       }
    3266         124 :       __ psllw(kScratchDoubleReg, 8);
    3267             :       __ por(dst, kScratchDoubleReg);
    3268             :       break;
    3269             :     }
    3270             :     case kX64S8x8Reverse:
    3271             :     case kX64S8x4Reverse:
    3272             :     case kX64S8x2Reverse: {
    3273             :       DCHECK_EQ(1, instr->InputCount());
    3274             :       XMMRegister dst = i.OutputSimd128Register();
    3275             :       DCHECK_EQ(dst, i.InputSimd128Register(0));
    3276         304 :       if (arch_opcode != kX64S8x2Reverse) {
    3277             :         // First shuffle words into position.
    3278         204 :         int8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
    3279         204 :         __ pshuflw(dst, dst, shuffle_mask);
    3280         204 :         __ pshufhw(dst, dst, shuffle_mask);
    3281             :       }
    3282         304 :       __ movaps(kScratchDoubleReg, dst);
    3283         304 :       __ psrlw(kScratchDoubleReg, 8);
    3284         304 :       __ psllw(dst, 8);
    3285             :       __ por(dst, kScratchDoubleReg);
    3286             :       break;
    3287             :     }
    3288             :     case kX64S1x4AnyTrue:
    3289             :     case kX64S1x8AnyTrue:
    3290             :     case kX64S1x16AnyTrue: {
    3291             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3292             :       Register dst = i.OutputRegister();
    3293             :       XMMRegister src = i.InputSimd128Register(0);
    3294             :       Register tmp = i.TempRegister(0);
    3295          60 :       __ xorq(tmp, tmp);
    3296             :       __ movq(dst, Immediate(1));
    3297             :       __ ptest(src, src);
    3298          60 :       __ cmovq(zero, dst, tmp);
    3299             :       break;
    3300             :     }
    3301             :     case kX64S1x4AllTrue:
    3302             :     case kX64S1x8AllTrue:
    3303             :     case kX64S1x16AllTrue: {
    3304             :       CpuFeatureScope sse_scope(tasm(), SSE4_1);
    3305             :       Register dst = i.OutputRegister();
    3306             :       XMMRegister src = i.InputSimd128Register(0);
    3307             :       Register tmp = i.TempRegister(0);
    3308          60 :       __ movq(tmp, Immediate(1));
    3309             :       __ xorq(dst, dst);
    3310             :       __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
    3311             :       __ pxor(kScratchDoubleReg, src);
    3312             :       __ ptest(kScratchDoubleReg, kScratchDoubleReg);
    3313          60 :       __ cmovq(zero, dst, tmp);
    3314             :       break;
    3315             :     }
    3316             :     case kX64StackCheck:
    3317      561937 :       __ CompareRoot(rsp, RootIndex::kStackLimit);
    3318      561950 :       break;
    3319             :     case kWord32AtomicExchangeInt8: {
    3320        1027 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3321        1026 :       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
    3322        1028 :       break;
    3323             :     }
    3324             :     case kWord32AtomicExchangeUint8: {
    3325         904 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3326             :       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
    3327             :       break;
    3328             :     }
    3329             :     case kWord32AtomicExchangeInt16: {
    3330         602 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3331         602 :       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
    3332         602 :       break;
    3333             :     }
    3334             :     case kWord32AtomicExchangeUint16: {
    3335         858 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3336             :       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
    3337             :       break;
    3338             :     }
    3339             :     case kWord32AtomicExchangeWord32: {
    3340        1284 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3341             :       break;
    3342             :     }
    3343             :     case kWord32AtomicCompareExchangeInt8: {
    3344         112 :       __ lock();
    3345         112 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3346         112 :       __ movsxbl(rax, rax);
    3347         112 :       break;
    3348             :     }
    3349             :     case kWord32AtomicCompareExchangeUint8: {
    3350         129 :       __ lock();
    3351         129 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3352             :       __ movzxbl(rax, rax);
    3353             :       break;
    3354             :     }
    3355             :     case kWord32AtomicCompareExchangeInt16: {
    3356         112 :       __ lock();
    3357         112 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3358         112 :       __ movsxwl(rax, rax);
    3359         112 :       break;
    3360             :     }
    3361             :     case kWord32AtomicCompareExchangeUint16: {
    3362         129 :       __ lock();
    3363         129 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3364             :       __ movzxwl(rax, rax);
    3365             :       break;
    3366             :     }
    3367             :     case kWord32AtomicCompareExchangeWord32: {
    3368         257 :       __ lock();
    3369             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3370             :       break;
    3371             :     }
    3372             : #define ATOMIC_BINOP_CASE(op, inst)              \
    3373             :   case kWord32Atomic##op##Int8:                  \
    3374             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3375             :     __ movsxbl(rax, rax);                        \
    3376             :     break;                                       \
    3377             :   case kWord32Atomic##op##Uint8:                 \
    3378             :     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
    3379             :     __ movzxbl(rax, rax);                        \
    3380             :     break;                                       \
    3381             :   case kWord32Atomic##op##Int16:                 \
    3382             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3383             :     __ movsxwl(rax, rax);                        \
    3384             :     break;                                       \
    3385             :   case kWord32Atomic##op##Uint16:                \
    3386             :     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
    3387             :     __ movzxwl(rax, rax);                        \
    3388             :     break;                                       \
    3389             :   case kWord32Atomic##op##Word32:                \
    3390             :     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
    3391             :     break;
    3392        8996 :       ATOMIC_BINOP_CASE(Add, addl)
    3393        6158 :       ATOMIC_BINOP_CASE(Sub, subl)
    3394        8531 :       ATOMIC_BINOP_CASE(And, andl)
    3395        7754 :       ATOMIC_BINOP_CASE(Or, orl)
    3396        7886 :       ATOMIC_BINOP_CASE(Xor, xorl)
    3397             : #undef ATOMIC_BINOP_CASE
    3398             :     case kX64Word64AtomicExchangeUint8: {
    3399        1942 :       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
    3400             :       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
    3401             :       break;
    3402             :     }
    3403             :     case kX64Word64AtomicExchangeUint16: {
    3404        1352 :       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
    3405             :       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
    3406             :       break;
    3407             :     }
    3408             :     case kX64Word64AtomicExchangeUint32: {
    3409         742 :       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
    3410             :       break;
    3411             :     }
    3412             :     case kX64Word64AtomicExchangeUint64: {
    3413         864 :       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
    3414             :       break;
    3415             :     }
    3416             :     case kX64Word64AtomicCompareExchangeUint8: {
    3417          17 :       __ lock();
    3418          17 :       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
    3419             :       __ movzxbq(rax, rax);
    3420             :       break;
    3421             :     }
    3422             :     case kX64Word64AtomicCompareExchangeUint16: {
    3423          25 :       __ lock();
    3424          25 :       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
    3425             :       __ movzxwq(rax, rax);
    3426             :       break;
    3427             :     }
    3428             :     case kX64Word64AtomicCompareExchangeUint32: {
    3429          25 :       __ lock();
    3430             :       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
    3431             :       break;
    3432             :     }
    3433             :     case kX64Word64AtomicCompareExchangeUint64: {
    3434         265 :       __ lock();
    3435             :       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
    3436             :       break;
    3437             :     }
    3438             : #define ATOMIC64_BINOP_CASE(op, inst)              \
    3439             :   case kX64Word64Atomic##op##Uint8:                \
    3440             :     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
    3441             :     __ movzxbq(rax, rax);                          \
    3442             :     break;                                         \
    3443             :   case kX64Word64Atomic##op##Uint16:               \
    3444             :     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
    3445             :     __ movzxwq(rax, rax);                          \
    3446             :     break;                                         \
    3447             :   case kX64Word64Atomic##op##Uint32:               \
    3448             :     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
    3449             :     break;                                         \
    3450             :   case kX64Word64Atomic##op##Uint64:               \
    3451             :     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
    3452             :     break;
    3453        6634 :       ATOMIC64_BINOP_CASE(Add, addq)
    3454        9340 :       ATOMIC64_BINOP_CASE(Sub, subq)
    3455        7667 :       ATOMIC64_BINOP_CASE(And, andq)
    3456        8644 :       ATOMIC64_BINOP_CASE(Or, orq)
    3457        8512 :       ATOMIC64_BINOP_CASE(Xor, xorq)
    3458             : #undef ATOMIC64_BINOP_CASE
    3459             :     case kWord32AtomicLoadInt8:
    3460             :     case kWord32AtomicLoadUint8:
    3461             :     case kWord32AtomicLoadInt16:
    3462             :     case kWord32AtomicLoadUint16:
    3463             :     case kWord32AtomicLoadWord32:
    3464             :     case kWord32AtomicStoreWord8:
    3465             :     case kWord32AtomicStoreWord16:
    3466             :     case kWord32AtomicStoreWord32:
    3467             :     case kX64Word64AtomicLoadUint8:
    3468             :     case kX64Word64AtomicLoadUint16:
    3469             :     case kX64Word64AtomicLoadUint32:
    3470             :     case kX64Word64AtomicLoadUint64:
    3471             :     case kX64Word64AtomicStoreWord8:
    3472             :     case kX64Word64AtomicStoreWord16:
    3473             :     case kX64Word64AtomicStoreWord32:
    3474             :     case kX64Word64AtomicStoreWord64:
    3475           0 :       UNREACHABLE();  // Won't be generated by instruction selector.
    3476             :       break;
    3477             :   }
    3478             :   return kSuccess;
    3479             : }  // NOLadability/fn_size)
    3480             : 
    3481             : #undef ASSEMBLE_UNOP
    3482             : #undef ASSEMBLE_BINOP
    3483             : #undef ASSEMBLE_COMPARE
    3484             : #undef ASSEMBLE_MULT
    3485             : #undef ASSEMBLE_SHIFT
    3486             : #undef ASSEMBLE_MOVX
    3487             : #undef ASSEMBLE_SSE_BINOP
    3488             : #undef ASSEMBLE_SSE_UNOP
    3489             : #undef ASSEMBLE_AVX_BINOP
    3490             : #undef ASSEMBLE_IEEE754_BINOP
    3491             : #undef ASSEMBLE_IEEE754_UNOP
    3492             : #undef ASSEMBLE_ATOMIC_BINOP
    3493             : #undef ASSEMBLE_ATOMIC64_BINOP
    3494             : #undef ASSEMBLE_SIMD_INSTR
    3495             : #undef ASSEMBLE_SIMD_IMM_INSTR
    3496             : #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
    3497             : #undef ASSEMBLE_SIMD_IMM_SHUFFLE
    3498             : 
    3499             : namespace {
    3500             : 
    3501     6149065 : Condition FlagsConditionToCondition(FlagsCondition condition) {
    3502     6149065 :   switch (condition) {
    3503             :     case kUnorderedEqual:
    3504             :     case kEqual:
    3505             :       return equal;
    3506             :     case kUnorderedNotEqual:
    3507             :     case kNotEqual:
    3508     1356209 :       return not_equal;
    3509             :     case kSignedLessThan:
    3510      168581 :       return less;
    3511             :     case kSignedGreaterThanOrEqual:
    3512       62092 :       return greater_equal;
    3513             :     case kSignedLessThanOrEqual:
    3514       67002 :       return less_equal;
    3515             :     case kSignedGreaterThan:
    3516       72868 :       return greater;
    3517             :     case kUnsignedLessThan:
    3518      164989 :       return below;
    3519             :     case kUnsignedGreaterThanOrEqual:
    3520      274889 :       return above_equal;
    3521             :     case kUnsignedLessThanOrEqual:
    3522      945152 :       return below_equal;
    3523             :     case kUnsignedGreaterThan:
    3524      157297 :       return above;
    3525             :     case kOverflow:
    3526      339794 :       return overflow;
    3527             :     case kNotOverflow:
    3528           7 :       return no_overflow;
    3529             :     default:
    3530             :       break;
    3531             :   }
    3532           0 :   UNREACHABLE();
    3533             : }
    3534             : 
    3535             : }  // namespace
    3536             : 
    3537             : // Assembles branches after this instruction.
    3538     5406918 : void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
    3539             :   Label::Distance flabel_distance =
    3540     5406918 :       branch->fallthru ? Label::kNear : Label::kFar;
    3541     5406918 :   Label* tlabel = branch->true_label;
    3542     5406918 :   Label* flabel = branch->false_label;
    3543     5406918 :   if (branch->condition == kUnorderedEqual) {
    3544       38092 :     __ j(parity_even, flabel, flabel_distance);
    3545     5368826 :   } else if (branch->condition == kUnorderedNotEqual) {
    3546      118197 :     __ j(parity_even, tlabel);
    3547             :   }
    3548     5406917 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3549             : 
    3550     5406938 :   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
    3551     5406938 : }
    3552             : 
    3553           0 : void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
    3554             :                                             Instruction* instr) {
    3555             :   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
    3556           0 :   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
    3557             :     return;
    3558             :   }
    3559             : 
    3560             :   condition = NegateFlagsCondition(condition);
    3561           0 :   __ movl(kScratchRegister, Immediate(0));
    3562           0 :   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
    3563           0 :            kScratchRegister);
    3564             : }
    3565             : 
    3566      332001 : void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
    3567             :                                             BranchInfo* branch) {
    3568             :   Label::Distance flabel_distance =
    3569      332001 :       branch->fallthru ? Label::kNear : Label::kFar;
    3570      332001 :   Label* tlabel = branch->true_label;
    3571      332001 :   Label* flabel = branch->false_label;
    3572      332001 :   Label nodeopt;
    3573      332001 :   if (branch->condition == kUnorderedEqual) {
    3574           0 :     __ j(parity_even, flabel, flabel_distance);
    3575      332001 :   } else if (branch->condition == kUnorderedNotEqual) {
    3576        3897 :     __ j(parity_even, tlabel);
    3577             :   }
    3578      332001 :   __ j(FlagsConditionToCondition(branch->condition), tlabel);
    3579             : 
    3580      332005 :   if (FLAG_deopt_every_n_times > 0) {
    3581             :     ExternalReference counter =
    3582         288 :         ExternalReference::stress_deopt_count(isolate());
    3583             : 
    3584         288 :     __ pushfq();
    3585         288 :     __ pushq(rax);
    3586         288 :     __ load_rax(counter);
    3587             :     __ decl(rax);
    3588         288 :     __ j(not_zero, &nodeopt);
    3589             : 
    3590         576 :     __ Set(rax, FLAG_deopt_every_n_times);
    3591         288 :     __ store_rax(counter);
    3592         288 :     __ popq(rax);
    3593         288 :     __ popfq();
    3594         288 :     __ jmp(tlabel);
    3595             : 
    3596         288 :     __ bind(&nodeopt);
    3597         288 :     __ store_rax(counter);
    3598         288 :     __ popq(rax);
    3599         288 :     __ popfq();
    3600             :   }
    3601             : 
    3602      332005 :   if (!branch->fallthru) {
    3603           0 :     __ jmp(flabel, flabel_distance);
    3604             :   }
    3605      332005 : }
    3606             : 
    3607     5121997 : void CodeGenerator::AssembleArchJump(RpoNumber target) {
    3608     8350975 :   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
    3609     5122010 : }
    3610             : 
    3611       34500 : void CodeGenerator::AssembleArchTrap(Instruction* instr,
    3612             :                                      FlagsCondition condition) {
    3613             :   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
    3614             :   Label* tlabel = ool->entry();
    3615       34447 :   Label end;
    3616       34447 :   if (condition == kUnorderedEqual) {
    3617           0 :     __ j(parity_even, &end);
    3618       34447 :   } else if (condition == kUnorderedNotEqual) {
    3619         304 :     __ j(parity_even, tlabel);
    3620             :   }
    3621       34447 :   __ j(FlagsConditionToCondition(condition), tlabel);
    3622       34472 :   __ bind(&end);
    3623       34481 : }
    3624             : 
    3625             : // Assembles boolean materializations after this instruction.
    3626      375714 : void CodeGenerator::AssembleArchBoolean(Instruction* instr,
    3627             :                                         FlagsCondition condition) {
    3628             :   X64OperandConverter i(this, instr);
    3629      375714 :   Label done;
    3630             : 
    3631             :   // Materialize a full 64-bit 1 or 0 value. The result register is always the
    3632             :   // last output of the instruction.
    3633      375714 :   Label check;
    3634             :   DCHECK_NE(0u, instr->OutputCount());
    3635      375714 :   Register reg = i.OutputRegister(instr->OutputCount() - 1);
    3636      375714 :   if (condition == kUnorderedEqual) {
    3637        2726 :     __ j(parity_odd, &check, Label::kNear);
    3638             :     __ movl(reg, Immediate(0));
    3639        2726 :     __ jmp(&done, Label::kNear);
    3640      372988 :   } else if (condition == kUnorderedNotEqual) {
    3641        2496 :     __ j(parity_odd, &check, Label::kNear);
    3642             :     __ movl(reg, Immediate(1));
    3643        2496 :     __ jmp(&done, Label::kNear);
    3644             :   }
    3645      375714 :   __ bind(&check);
    3646      375714 :   __ setcc(FlagsConditionToCondition(condition), reg);
    3647             :   __ movzxbl(reg, reg);
    3648      375717 :   __ bind(&done);
    3649      375720 : }
    3650             : 
    3651       33872 : void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
    3652             :   X64OperandConverter i(this, instr);
    3653       33872 :   Register input = i.InputRegister(0);
    3654             :   std::vector<std::pair<int32_t, Label*>> cases;
    3655      410520 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3656      376648 :     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
    3657             :   }
    3658             :   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
    3659       33873 :                                       cases.data() + cases.size());
    3660       33872 : }
    3661             : 
    3662           0 : void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
    3663             :   X64OperandConverter i(this, instr);
    3664             :   Register input = i.InputRegister(0);
    3665           0 :   for (size_t index = 2; index < instr->InputCount(); index += 2) {
    3666           0 :     __ cmpl(input, Immediate(i.InputInt32(index + 0)));
    3667           0 :     __ j(equal, GetLabel(i.InputRpo(index + 1)));
    3668             :   }
    3669           0 :   AssembleArchJump(i.InputRpo(1));
    3670           0 : }
    3671             : 
    3672         307 : void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
    3673             :   X64OperandConverter i(this, instr);
    3674             :   Register input = i.InputRegister(0);
    3675         307 :   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
    3676         307 :   Label** cases = zone()->NewArray<Label*>(case_count);
    3677      402357 :   for (int32_t index = 0; index < case_count; ++index) {
    3678      402050 :     cases[index] = GetLabel(i.InputRpo(index + 2));
    3679             :   }
    3680         307 :   Label* const table = AddJumpTable(cases, case_count);
    3681         307 :   __ cmpl(input, Immediate(case_count));
    3682         307 :   __ j(above_equal, GetLabel(i.InputRpo(1)));
    3683         614 :   __ leaq(kScratchRegister, Operand(table));
    3684         307 :   __ jmp(Operand(kScratchRegister, input, times_8, 0));
    3685         307 : }
    3686             : 
    3687             : namespace {
    3688             : 
    3689             : static const int kQuadWordSize = 16;
    3690             : 
    3691             : }  // namespace
    3692             : 
    3693     2515398 : void CodeGenerator::FinishFrame(Frame* frame) {
    3694             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3695             : 
    3696             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3697     2515398 :   if (saves_fp != 0) {
    3698             :     frame->AlignSavedCalleeRegisterSlots();
    3699           0 :     if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3700             :       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3701           0 :       frame->AllocateSavedCalleeRegisterSlots(
    3702           0 :           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
    3703             :     }
    3704             :   }
    3705             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3706     2515398 :   if (saves != 0) {  // Save callee-saved registers.
    3707             :     int count = 0;
    3708    29846685 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3709    14471120 :       if (((1 << i) & saves)) {
    3710     4522225 :         ++count;
    3711             :       }
    3712             :     }
    3713             :     frame->AllocateSavedCalleeRegisterSlots(count);
    3714             :   }
    3715     2515398 : }
    3716             : 
    3717     2537961 : void CodeGenerator::AssembleConstructFrame() {
    3718             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3719     2537961 :   if (frame_access_state()->has_frame()) {
    3720             :     int pc_base = __ pc_offset();
    3721             : 
    3722     2538176 :     if (call_descriptor->IsCFunctionCall()) {
    3723      904445 :       __ pushq(rbp);
    3724             :       __ movq(rbp, rsp);
    3725     1633731 :     } else if (call_descriptor->IsJSFunctionCall()) {
    3726      644073 :       __ Prologue();
    3727      644075 :       if (call_descriptor->PushArgumentCount()) {
    3728       38448 :         __ pushq(kJavaScriptCallArgCountRegister);
    3729             :       }
    3730             :     } else {
    3731     1979162 :       __ StubPrologue(info()->GetOutputStackFrameType());
    3732      989652 :       if (call_descriptor->IsWasmFunctionCall()) {
    3733      864304 :         __ pushq(kWasmInstanceRegister);
    3734      125348 :       } else if (call_descriptor->IsWasmImportWrapper()) {
    3735             :         // WASM import wrappers are passed a tuple in the place of the instance.
    3736             :         // Unpack the tuple into the instance and the target callable.
    3737             :         // This must be done here in the codegen because it cannot be expressed
    3738             :         // properly in the graph.
    3739             :         __ LoadTaggedPointerField(
    3740             :             kJSFunctionRegister,
    3741        6490 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
    3742             :         __ LoadTaggedPointerField(
    3743             :             kWasmInstanceRegister,
    3744        6490 :             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
    3745        6490 :         __ pushq(kWasmInstanceRegister);
    3746             :       }
    3747             :     }
    3748             : 
    3749     2538344 :     unwinding_info_writer_.MarkFrameConstructed(pc_base);
    3750             :   }
    3751             :   int required_slots = frame()->GetTotalFrameSlotCount() -
    3752     2537639 :                        call_descriptor->CalculateFixedFrameSize();
    3753             : 
    3754     2538138 :   if (info()->is_osr()) {
    3755             :     // TurboFan OSR-compiled functions cannot be entered directly.
    3756        4698 :     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
    3757             : 
    3758             :     // Unoptimized code jumps directly to this entrypoint while the unoptimized
    3759             :     // frame is still on the stack. Optimized code uses OSR values directly from
    3760             :     // the unoptimized frame. Thus, all that needs to be done is to allocate the
    3761             :     // remaining stack slots.
    3762        4698 :     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
    3763        4698 :     osr_pc_offset_ = __ pc_offset();
    3764        4698 :     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
    3765        4698 :     ResetSpeculationPoison();
    3766             :   }
    3767             : 
    3768             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3769             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3770             : 
    3771     2538009 :   if (required_slots > 0) {
    3772             :     DCHECK(frame_access_state()->has_frame());
    3773     2044763 :     if (info()->IsWasm() && required_slots > 128) {
    3774             :       // For WebAssembly functions with big frames we have to do the stack
    3775             :       // overflow check before we construct the frame. Otherwise we may not
    3776             :       // have enough space on the stack to call the runtime for the stack
    3777             :       // overflow.
    3778           8 :       Label done;
    3779             : 
    3780             :       // If the frame is bigger than the stack, we throw the stack overflow
    3781             :       // exception unconditionally. Thereby we can avoid the integer overflow
    3782             :       // check in the condition code.
    3783           8 :       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
    3784           8 :         __ movq(kScratchRegister,
    3785             :                 FieldOperand(kWasmInstanceRegister,
    3786             :                              WasmInstanceObject::kRealStackLimitAddressOffset));
    3787          16 :         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
    3788             :         __ addq(kScratchRegister,
    3789             :                 Immediate(required_slots * kSystemPointerSize));
    3790             :         __ cmpq(rsp, kScratchRegister);
    3791           8 :         __ j(above_equal, &done);
    3792             :       }
    3793             : 
    3794           8 :       __ near_call(wasm::WasmCode::kWasmStackOverflow,
    3795           8 :                    RelocInfo::WASM_STUB_CALL);
    3796             :       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
    3797             :       RecordSafepoint(reference_map, Safepoint::kSimple,
    3798           8 :                       Safepoint::kNoLazyDeopt);
    3799           8 :       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
    3800           8 :       __ bind(&done);
    3801             :     }
    3802             : 
    3803             :     // Skip callee-saved and return slots, which are created below.
    3804     2044763 :     required_slots -= base::bits::CountPopulation(saves);
    3805             :     required_slots -= base::bits::CountPopulation(saves_fp) *
    3806     2044763 :                       (kQuadWordSize / kSystemPointerSize);
    3807     2044763 :     required_slots -= frame()->GetReturnSlotCount();
    3808     2044763 :     if (required_slots > 0) {
    3809     1868179 :       __ subq(rsp, Immediate(required_slots * kSystemPointerSize));
    3810             :     }
    3811             :   }
    3812             : 
    3813     2538018 :   if (saves_fp != 0) {  // Save callee-saved XMM registers.
    3814             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3815           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3816             :     // Adjust the stack pointer.
    3817           0 :     __ subq(rsp, Immediate(stack_size));
    3818             :     // Store the registers on the stack.
    3819             :     int slot_idx = 0;
    3820           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3821           0 :       if (!((1 << i) & saves_fp)) continue;
    3822           0 :       __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
    3823           0 :                 XMMRegister::from_code(i));
    3824           0 :       slot_idx++;
    3825             :     }
    3826             :   }
    3827             : 
    3828     2538018 :   if (saves != 0) {  // Save callee-saved registers.
    3829    29846685 :     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
    3830    14471120 :       if (!((1 << i) & saves)) continue;
    3831     4522225 :       __ pushq(Register::from_code(i));
    3832             :     }
    3833             :   }
    3834             : 
    3835             :   // Allocate return slots (located after callee-saved).
    3836     2538018 :   if (frame()->GetReturnSlotCount() > 0) {
    3837         672 :     __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kSystemPointerSize));
    3838             :   }
    3839     2538018 : }
    3840             : 
    3841     2787043 : void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
    3842             :   auto call_descriptor = linkage()->GetIncomingDescriptor();
    3843             : 
    3844             :   // Restore registers.
    3845             :   const RegList saves = call_descriptor->CalleeSavedRegisters();
    3846     2787043 :   if (saves != 0) {
    3847             :     const int returns = frame()->GetReturnSlotCount();
    3848      913813 :     if (returns != 0) {
    3849         656 :       __ addq(rsp, Immediate(returns * kSystemPointerSize));
    3850             :     }
    3851    30155829 :     for (int i = 0; i < Register::kNumRegisters; i++) {
    3852    14621008 :       if (!((1 << i) & saves)) continue;
    3853     4569065 :       __ popq(Register::from_code(i));
    3854             :     }
    3855             :   }
    3856             :   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
    3857     2787043 :   if (saves_fp != 0) {
    3858             :     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
    3859           0 :     const int stack_size = saves_fp_count * kQuadWordSize;
    3860             :     // Load the registers from the stack.
    3861             :     int slot_idx = 0;
    3862           0 :     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
    3863           0 :       if (!((1 << i) & saves_fp)) continue;
    3864           0 :       __ movdqu(XMMRegister::from_code(i),
    3865           0 :                 Operand(rsp, kQuadWordSize * slot_idx));
    3866           0 :       slot_idx++;
    3867             :     }
    3868             :     // Adjust the stack pointer.
    3869           0 :     __ addq(rsp, Immediate(stack_size));
    3870             :   }
    3871             : 
    3872             :   unwinding_info_writer_.MarkBlockWillExit();
    3873             : 
    3874             :   // Might need rcx for scratch if pop_size is too big or if there is a variable
    3875             :   // pop count.
    3876             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
    3877             :   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
    3878     2787011 :   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
    3879             :   X64OperandConverter g(this, nullptr);
    3880     2787011 :   if (call_descriptor->IsCFunctionCall()) {
    3881      913813 :     AssembleDeconstructFrame();
    3882     1873198 :   } else if (frame_access_state()->has_frame()) {
    3883     3618884 :     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
    3884             :       // Canonicalize JSFunction return sites for now.
    3885     1789545 :       if (return_label_.is_bound()) {
    3886      281455 :         __ jmp(&return_label_);
    3887             :         return;
    3888             :       } else {
    3889     1508090 :         __ bind(&return_label_);
    3890     1508187 :         AssembleDeconstructFrame();
    3891             :       }
    3892             :     } else {
    3893       39788 :       AssembleDeconstructFrame();
    3894             :     }
    3895             :   }
    3896             : 
    3897     2505971 :   if (pop->IsImmediate()) {
    3898     4932475 :     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
    3899     2466268 :     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
    3900     4932536 :     __ Ret(static_cast<int>(pop_size), rcx);
    3901             :   } else {
    3902             :     Register pop_reg = g.ToRegister(pop);
    3903       39764 :     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
    3904       39764 :     __ popq(scratch_reg);
    3905       79528 :     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
    3906       39764 :     __ jmp(scratch_reg);
    3907             :   }
    3908             : }
    3909             : 
    3910     2514917 : void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
    3911             : 
    3912    38515432 : void CodeGenerator::AssembleMove(InstructionOperand* source,
    3913             :                                  InstructionOperand* destination) {
    3914             :   X64OperandConverter g(this, nullptr);
    3915             :   // Helper function to write the given constant to the dst register.
    3916    19141472 :   auto MoveConstantToRegister = [&](Register dst, Constant src) {
    3917    19141472 :     switch (src.type()) {
    3918             :       case Constant::kInt32: {
    3919     3875880 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3920    27580563 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3921             :         } else {
    3922             :           int32_t value = src.ToInt32();
    3923     3875880 :           if (value == 0) {
    3924      992350 :             __ xorl(dst, dst);
    3925             :           } else {
    3926     2883530 :             __ movl(dst, Immediate(value));
    3927             :           }
    3928             :         }
    3929             :         break;
    3930             :       }
    3931             :       case Constant::kInt64:
    3932     1760941 :         if (RelocInfo::IsWasmReference(src.rmode())) {
    3933           0 :           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
    3934             :         } else {
    3935     1760941 :           __ Set(dst, src.ToInt64());
    3936             :         }
    3937             :         break;
    3938             :       case Constant::kFloat32:
    3939         688 :         __ MoveNumber(dst, src.ToFloat32());
    3940         344 :         break;
    3941             :       case Constant::kFloat64:
    3942     1790347 :         __ MoveNumber(dst, src.ToFloat64().value());
    3943     1790341 :         break;
    3944             :       case Constant::kExternalReference:
    3945     3272466 :         __ Move(dst, src.ToExternalReference());
    3946     3272488 :         break;
    3947             :       case Constant::kHeapObject: {
    3948     8439239 :         Handle<HeapObject> src_object = src.ToHeapObject();
    3949             :         RootIndex index;
    3950     8439231 :         if (IsMaterializableFromRoot(src_object, &index)) {
    3951     1920669 :           __ LoadRoot(dst, index);
    3952             :         } else {
    3953     6518560 :           __ Move(dst, src_object);
    3954             :         }
    3955             :         break;
    3956             :       }
    3957             :       case Constant::kDelayedStringConstant: {
    3958        2125 :         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
    3959        2125 :         __ MoveStringConstant(dst, src_constant);
    3960        2125 :         break;
    3961             :       }
    3962             :       case Constant::kRpoNumber:
    3963           0 :         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
    3964             :         break;
    3965             :     }
    3966    57657005 :   };
    3967             :   // Helper function to write the given constant to the stack.
    3968       38057 :   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
    3969       38057 :     if (!RelocInfo::IsWasmReference(src.rmode())) {
    3970       38057 :       switch (src.type()) {
    3971             :         case Constant::kInt32:
    3972       38057 :           __ movq(dst, Immediate(src.ToInt32()));
    3973       19413 :           return;
    3974             :         case Constant::kInt64:
    3975       12727 :           __ Set(dst, src.ToInt64());
    3976       12727 :           return;
    3977             :         default:
    3978             :           break;
    3979             :       }
    3980             :     }
    3981        5917 :     MoveConstantToRegister(kScratchRegister, src);
    3982        5917 :     __ movq(dst, kScratchRegister);
    3983    38515432 :   };
    3984             :   // Dispatch on the source and destination operand kinds.
    3985    38515432 :   switch (MoveType::InferMove(source, destination)) {
    3986             :     case MoveType::kRegisterToRegister:
    3987     4149488 :       if (source->IsRegister()) {
    3988     4037106 :         __ movq(g.ToRegister(destination), g.ToRegister(source));
    3989             :       } else {
    3990             :         DCHECK(source->IsFPRegister());
    3991             :         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
    3992             :       }
    3993             :       return;
    3994             :     case MoveType::kRegisterToStack: {
    3995     5794330 :       Operand dst = g.ToOperand(destination);
    3996     5794330 :       if (source->IsRegister()) {
    3997     5447362 :         __ movq(dst, g.ToRegister(source));
    3998             :       } else {
    3999             :         DCHECK(source->IsFPRegister());
    4000             :         XMMRegister src = g.ToDoubleRegister(source);
    4001             :         MachineRepresentation rep =
    4002             :             LocationOperand::cast(source)->representation();
    4003      346968 :         if (rep != MachineRepresentation::kSimd128) {
    4004             :           __ Movsd(dst, src);
    4005             :         } else {
    4006             :           __ Movups(dst, src);
    4007             :         }
    4008             :       }
    4009             :       return;
    4010             :     }
    4011             :     case MoveType::kStackToRegister: {
    4012     9030184 :       Operand src = g.ToOperand(source);
    4013     9030184 :       if (source->IsStackSlot()) {
    4014     8518159 :         __ movq(g.ToRegister(destination), src);
    4015             :       } else {
    4016             :         DCHECK(source->IsFPStackSlot());
    4017             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4018             :         MachineRepresentation rep =
    4019             :             LocationOperand::cast(source)->representation();
    4020      512025 :         if (rep != MachineRepresentation::kSimd128) {
    4021             :           __ Movsd(dst, src);
    4022             :         } else {
    4023             :           __ Movups(dst, src);
    4024             :         }
    4025             :       }
    4026             :       return;
    4027             :     }
    4028             :     case MoveType::kStackToStack: {
    4029       44958 :       Operand src = g.ToOperand(source);
    4030       44958 :       Operand dst = g.ToOperand(destination);
    4031       44958 :       if (source->IsStackSlot()) {
    4032             :         // Spill on demand to use a temporary register for memory-to-memory
    4033             :         // moves.
    4034       24813 :         __ movq(kScratchRegister, src);
    4035             :         __ movq(dst, kScratchRegister);
    4036             :       } else {
    4037             :         MachineRepresentation rep =
    4038             :             LocationOperand::cast(source)->representation();
    4039       20145 :         if (rep != MachineRepresentation::kSimd128) {
    4040             :           __ Movsd(kScratchDoubleReg, src);
    4041             :           __ Movsd(dst, kScratchDoubleReg);
    4042             :         } else {
    4043             :           DCHECK(source->IsSimd128StackSlot());
    4044             :           __ Movups(kScratchDoubleReg, src);
    4045             :           __ Movups(dst, kScratchDoubleReg);
    4046             :         }
    4047             :       }
    4048             :       return;
    4049             :     }
    4050             :     case MoveType::kConstantToRegister: {
    4051    19451822 :       Constant src = g.ToConstant(source);
    4052    19451982 :       if (destination->IsRegister()) {
    4053    19135581 :         MoveConstantToRegister(g.ToRegister(destination), src);
    4054             :       } else {
    4055             :         DCHECK(destination->IsFPRegister());
    4056      316401 :         XMMRegister dst = g.ToDoubleRegister(destination);
    4057      316401 :         if (src.type() == Constant::kFloat32) {
    4058             :           // TODO(turbofan): Can we do better here?
    4059       12670 :           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
    4060             :         } else {
    4061             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4062      303731 :           __ Move(dst, src.ToFloat64().AsUint64());
    4063             :         }
    4064             :       }
    4065             :       return;
    4066             :     }
    4067             :     case MoveType::kConstantToStack: {
    4068       44616 :       Constant src = g.ToConstant(source);
    4069       44616 :       Operand dst = g.ToOperand(destination);
    4070       44616 :       if (destination->IsStackSlot()) {
    4071       38057 :         MoveConstantToSlot(dst, src);
    4072             :       } else {
    4073             :         DCHECK(destination->IsFPStackSlot());
    4074        6559 :         if (src.type() == Constant::kFloat32) {
    4075        3008 :           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
    4076             :         } else {
    4077             :           DCHECK_EQ(src.type(), Constant::kFloat64);
    4078        3551 :           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
    4079             :           __ movq(dst, kScratchRegister);
    4080             :         }
    4081             :       }
    4082             :       return;
    4083             :     }
    4084             :   }
    4085           0 :   UNREACHABLE();
    4086             : }
    4087             : 
    4088       66091 : void CodeGenerator::AssembleSwap(InstructionOperand* source,
    4089             :                                  InstructionOperand* destination) {
    4090             :   X64OperandConverter g(this, nullptr);
    4091             :   // Dispatch on the source and destination operand kinds.  Not all
    4092             :   // combinations are possible.
    4093       66091 :   switch (MoveType::InferSwap(source, destination)) {
    4094             :     case MoveType::kRegisterToRegister: {
    4095       55667 :       if (source->IsRegister()) {
    4096             :         Register src = g.ToRegister(source);
    4097             :         Register dst = g.ToRegister(destination);
    4098       53100 :         __ movq(kScratchRegister, src);
    4099             :         __ movq(src, dst);
    4100             :         __ movq(dst, kScratchRegister);
    4101             :       } else {
    4102             :         DCHECK(source->IsFPRegister());
    4103             :         XMMRegister src = g.ToDoubleRegister(source);
    4104             :         XMMRegister dst = g.ToDoubleRegister(destination);
    4105             :         __ Movapd(kScratchDoubleReg, src);
    4106             :         __ Movapd(src, dst);
    4107             :         __ Movapd(dst, kScratchDoubleReg);
    4108             :       }
    4109             :       return;
    4110             :     }
    4111             :     case MoveType::kRegisterToStack: {
    4112        6767 :       if (source->IsRegister()) {
    4113             :         Register src = g.ToRegister(source);
    4114        1583 :         __ pushq(src);
    4115             :         frame_access_state()->IncreaseSPDelta(1);
    4116        1583 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4117        1583 :                                                          kSystemPointerSize);
    4118             :         __ movq(src, g.ToOperand(destination));
    4119             :         frame_access_state()->IncreaseSPDelta(-1);
    4120        1583 :         __ popq(g.ToOperand(destination));
    4121             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4122        1583 :                                                          -kSystemPointerSize);
    4123             :       } else {
    4124             :         DCHECK(source->IsFPRegister());
    4125             :         XMMRegister src = g.ToDoubleRegister(source);
    4126        5184 :         Operand dst = g.ToOperand(destination);
    4127             :         MachineRepresentation rep =
    4128             :             LocationOperand::cast(source)->representation();
    4129        5184 :         if (rep != MachineRepresentation::kSimd128) {
    4130             :           __ Movsd(kScratchDoubleReg, src);
    4131             :           __ Movsd(src, dst);
    4132             :           __ Movsd(dst, kScratchDoubleReg);
    4133             :         } else {
    4134             :           __ Movups(kScratchDoubleReg, src);
    4135             :           __ Movups(src, dst);
    4136             :           __ Movups(dst, kScratchDoubleReg);
    4137             :         }
    4138             :       }
    4139             :       return;
    4140             :     }
    4141             :     case MoveType::kStackToStack: {
    4142        3657 :       Operand src = g.ToOperand(source);
    4143        3657 :       Operand dst = g.ToOperand(destination);
    4144             :       MachineRepresentation rep =
    4145             :           LocationOperand::cast(source)->representation();
    4146        3657 :       if (rep != MachineRepresentation::kSimd128) {
    4147             :         Register tmp = kScratchRegister;
    4148        2789 :         __ movq(tmp, dst);
    4149        2789 :         __ pushq(src);  // Then use stack to copy src to destination.
    4150        2789 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4151        2789 :                                                          kSystemPointerSize);
    4152        2789 :         __ popq(dst);
    4153             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4154        2789 :                                                          -kSystemPointerSize);
    4155             :         __ movq(src, tmp);
    4156             :       } else {
    4157             :         // Without AVX, misaligned reads and writes will trap. Move using the
    4158             :         // stack, in two parts.
    4159         868 :         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
    4160         868 :         __ pushq(src);  // Then use stack to copy src to destination.
    4161         868 :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4162         868 :                                                          kSystemPointerSize);
    4163         868 :         __ popq(dst);
    4164             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4165         868 :                                                          -kSystemPointerSize);
    4166         868 :         __ pushq(g.ToOperand(source, kSystemPointerSize));
    4167             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4168         868 :                                                          kSystemPointerSize);
    4169         868 :         __ popq(g.ToOperand(destination, kSystemPointerSize));
    4170             :         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
    4171         868 :                                                          -kSystemPointerSize);
    4172         868 :         __ movups(src, kScratchDoubleReg);
    4173             :       }
    4174             :       return;
    4175             :     }
    4176             :     default:
    4177           0 :       UNREACHABLE();
    4178             :       break;
    4179             :   }
    4180             : }
    4181             : 
    4182         307 : void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
    4183      402357 :   for (size_t index = 0; index < target_count; ++index) {
    4184      201025 :     __ dq(targets[index]);
    4185             :   }
    4186         307 : }
    4187             : 
    4188             : #undef __
    4189             : 
    4190             : }  // namespace compiler
    4191             : }  // namespace internal
    4192      120216 : }  // namespace v8

Generated by: LCOV version 1.10