Line data Source code
1 : // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 : // All Rights Reserved.
3 : //
4 : // Redistribution and use in source and binary forms, with or without
5 : // modification, are permitted provided that the following conditions are
6 : // met:
7 : //
8 : // - Redistributions of source code must retain the above copyright notice,
9 : // this list of conditions and the following disclaimer.
10 : //
11 : // - Redistribution in binary form must reproduce the above copyright
12 : // notice, this list of conditions and the following disclaimer in the
13 : // documentation and/or other materials provided with the distribution.
14 : //
15 : // - Neither the name of Sun Microsystems or the names of contributors may
16 : // be used to endorse or promote products derived from this software without
17 : // specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 : // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 : // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 : // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 : // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 : // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 : // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 : // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 : // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 : // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // The original source code covered by the above license above has been
32 : // modified significantly by Google Inc.
33 : // Copyright 2012 the V8 project authors. All rights reserved.
34 :
35 : // A lightweight X64 Assembler.
36 :
37 : #ifndef V8_X64_ASSEMBLER_X64_H_
38 : #define V8_X64_ASSEMBLER_X64_H_
39 :
40 : #include <deque>
41 : #include <forward_list>
42 : #include <vector>
43 :
44 : #include "src/assembler.h"
45 : #include "src/x64/sse-instr.h"
46 :
47 : namespace v8 {
48 : namespace internal {
49 :
50 : // Utility functions
51 :
52 : #define GENERAL_REGISTERS(V) \
53 : V(rax) \
54 : V(rcx) \
55 : V(rdx) \
56 : V(rbx) \
57 : V(rsp) \
58 : V(rbp) \
59 : V(rsi) \
60 : V(rdi) \
61 : V(r8) \
62 : V(r9) \
63 : V(r10) \
64 : V(r11) \
65 : V(r12) \
66 : V(r13) \
67 : V(r14) \
68 : V(r15)
69 :
70 : #define ALLOCATABLE_GENERAL_REGISTERS(V) \
71 : V(rax) \
72 : V(rbx) \
73 : V(rdx) \
74 : V(rcx) \
75 : V(rsi) \
76 : V(rdi) \
77 : V(r8) \
78 : V(r9) \
79 : V(r11) \
80 : V(r12) \
81 : V(r14) \
82 : V(r15)
83 :
84 : // The length of pushq(rbp), movp(rbp, rsp), Push(rsi) and Push(rdi).
85 : constexpr int kNoCodeAgeSequenceLength = kPointerSize == kInt64Size ? 6 : 17;
86 :
87 : const int kNumRegs = 16;
88 : const RegList kJSCallerSaved =
89 : 1 << 0 | // rax
90 : 1 << 1 | // rcx
91 : 1 << 2 | // rdx
92 : 1 << 3 | // rbx - used as a caller-saved register in JavaScript code
93 : 1 << 7; // rdi - callee function
94 :
95 : const int kNumJSCallerSaved = 5;
96 :
97 : // Number of registers for which space is reserved in safepoints.
98 : const int kNumSafepointRegisters = 16;
99 :
100 : enum RegisterCode {
101 : #define REGISTER_CODE(R) kRegCode_##R,
102 : GENERAL_REGISTERS(REGISTER_CODE)
103 : #undef REGISTER_CODE
104 : kRegAfterLast
105 : };
106 :
107 : class Register : public RegisterBase<Register, kRegAfterLast> {
108 : public:
109 527369 : bool is_byte_register() const { return reg_code_ <= 3; }
110 : // Return the high bit of the register code as a 0 or 1. Used often
111 : // when constructing the REX prefix byte.
112 147679239 : int high_bit() const { return reg_code_ >> 3; }
113 : // Return the 3 low bits of the register code. Used when encoding registers
114 : // in modR/M, SIB, and opcode bytes.
115 150789912 : int low_bits() const { return reg_code_ & 0x7; }
116 :
117 : private:
118 : friend class RegisterBase<Register, kRegAfterLast>;
119 : explicit constexpr Register(int code) : RegisterBase(code) {}
120 : };
121 :
122 : static_assert(IS_TRIVIALLY_COPYABLE(Register) &&
123 : sizeof(Register) == sizeof(int),
124 : "Register can efficiently be passed by value");
125 :
126 : #define DECLARE_REGISTER(R) \
127 : constexpr Register R = Register::from_code<kRegCode_##R>();
128 : GENERAL_REGISTERS(DECLARE_REGISTER)
129 : #undef DECLARE_REGISTER
130 : constexpr Register no_reg = Register::no_reg();
131 :
132 : #ifdef _WIN64
133 : // Windows calling convention
134 : constexpr Register arg_reg_1 = rcx;
135 : constexpr Register arg_reg_2 = rdx;
136 : constexpr Register arg_reg_3 = r8;
137 : constexpr Register arg_reg_4 = r9;
138 : #else
139 : // AMD64 calling convention
140 : constexpr Register arg_reg_1 = rdi;
141 : constexpr Register arg_reg_2 = rsi;
142 : constexpr Register arg_reg_3 = rdx;
143 : constexpr Register arg_reg_4 = rcx;
144 : #endif // _WIN64
145 :
146 :
147 : #define DOUBLE_REGISTERS(V) \
148 : V(xmm0) \
149 : V(xmm1) \
150 : V(xmm2) \
151 : V(xmm3) \
152 : V(xmm4) \
153 : V(xmm5) \
154 : V(xmm6) \
155 : V(xmm7) \
156 : V(xmm8) \
157 : V(xmm9) \
158 : V(xmm10) \
159 : V(xmm11) \
160 : V(xmm12) \
161 : V(xmm13) \
162 : V(xmm14) \
163 : V(xmm15)
164 :
165 : #define FLOAT_REGISTERS DOUBLE_REGISTERS
166 : #define SIMD128_REGISTERS DOUBLE_REGISTERS
167 :
168 : #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
169 : V(xmm0) \
170 : V(xmm1) \
171 : V(xmm2) \
172 : V(xmm3) \
173 : V(xmm4) \
174 : V(xmm5) \
175 : V(xmm6) \
176 : V(xmm7) \
177 : V(xmm8) \
178 : V(xmm9) \
179 : V(xmm10) \
180 : V(xmm11) \
181 : V(xmm12) \
182 : V(xmm13) \
183 : V(xmm14)
184 :
185 : constexpr bool kSimpleFPAliasing = true;
186 : constexpr bool kSimdMaskRegisters = false;
187 :
188 : enum DoubleRegisterCode {
189 : #define REGISTER_CODE(R) kDoubleCode_##R,
190 : DOUBLE_REGISTERS(REGISTER_CODE)
191 : #undef REGISTER_CODE
192 : kDoubleAfterLast
193 : };
194 :
195 : class XMMRegister : public RegisterBase<XMMRegister, kDoubleAfterLast> {
196 : public:
197 : // Return the high bit of the register code as a 0 or 1. Used often
198 : // when constructing the REX prefix byte.
199 6943176 : int high_bit() const { return reg_code_ >> 3; }
200 : // Return the 3 low bits of the register code. Used when encoding registers
201 : // in modR/M, SIB, and opcode bytes.
202 3455929 : int low_bits() const { return reg_code_ & 0x7; }
203 :
204 : private:
205 : friend class RegisterBase<XMMRegister, kDoubleAfterLast>;
206 : explicit constexpr XMMRegister(int code) : RegisterBase(code) {}
207 : };
208 :
209 : static_assert(IS_TRIVIALLY_COPYABLE(XMMRegister) &&
210 : sizeof(XMMRegister) == sizeof(int),
211 : "XMMRegister can efficiently be passed by value");
212 :
213 : typedef XMMRegister FloatRegister;
214 :
215 : typedef XMMRegister DoubleRegister;
216 :
217 : typedef XMMRegister Simd128Register;
218 :
219 : #define DECLARE_REGISTER(R) \
220 : constexpr DoubleRegister R = DoubleRegister::from_code<kDoubleCode_##R>();
221 : DOUBLE_REGISTERS(DECLARE_REGISTER)
222 : #undef DECLARE_REGISTER
223 : constexpr DoubleRegister no_double_reg = DoubleRegister::no_reg();
224 :
225 : enum Condition {
226 : // any value < 0 is considered no_condition
227 : no_condition = -1,
228 :
229 : overflow = 0,
230 : no_overflow = 1,
231 : below = 2,
232 : above_equal = 3,
233 : equal = 4,
234 : not_equal = 5,
235 : below_equal = 6,
236 : above = 7,
237 : negative = 8,
238 : positive = 9,
239 : parity_even = 10,
240 : parity_odd = 11,
241 : less = 12,
242 : greater_equal = 13,
243 : less_equal = 14,
244 : greater = 15,
245 :
246 : // Fake conditions that are handled by the
247 : // opcodes using them.
248 : always = 16,
249 : never = 17,
250 : // aliases
251 : carry = below,
252 : not_carry = above_equal,
253 : zero = equal,
254 : not_zero = not_equal,
255 : sign = negative,
256 : not_sign = positive,
257 : last_condition = greater
258 : };
259 :
260 :
261 : // Returns the equivalent of !cc.
262 : // Negation of the default no_condition (-1) results in a non-default
263 : // no_condition value (-2). As long as tests for no_condition check
264 : // for condition < 0, this will work as expected.
265 : inline Condition NegateCondition(Condition cc) {
266 24 : return static_cast<Condition>(cc ^ 1);
267 : }
268 :
269 :
270 : // Commute a condition such that {a cond b == b cond' a}.
271 : inline Condition CommuteCondition(Condition cc) {
272 : switch (cc) {
273 : case below:
274 : return above;
275 : case above:
276 : return below;
277 : case above_equal:
278 : return below_equal;
279 : case below_equal:
280 : return above_equal;
281 : case less:
282 : return greater;
283 : case greater:
284 : return less;
285 : case greater_equal:
286 : return less_equal;
287 : case less_equal:
288 : return greater_equal;
289 : default:
290 : return cc;
291 : }
292 : }
293 :
294 :
295 : enum RoundingMode {
296 : kRoundToNearest = 0x0,
297 : kRoundDown = 0x1,
298 : kRoundUp = 0x2,
299 : kRoundToZero = 0x3
300 : };
301 :
302 :
303 : // -----------------------------------------------------------------------------
304 : // Machine instruction Immediates
305 :
306 : class Immediate BASE_EMBEDDED {
307 : public:
308 8807050 : explicit Immediate(int32_t value) : value_(value) {}
309 : explicit Immediate(int32_t value, RelocInfo::Mode rmode)
310 480 : : value_(value), rmode_(rmode) {}
311 : explicit Immediate(Smi* value) {
312 : DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
313 : value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
314 : }
315 :
316 : private:
317 : int32_t value_;
318 : RelocInfo::Mode rmode_ = RelocInfo::NONE32;
319 :
320 : friend class Assembler;
321 : };
322 :
323 :
324 : // -----------------------------------------------------------------------------
325 : // Machine instruction Operands
326 :
327 : enum ScaleFactor {
328 : times_1 = 0,
329 : times_2 = 1,
330 : times_4 = 2,
331 : times_8 = 3,
332 : times_int_size = times_4,
333 : times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
334 : };
335 :
336 :
337 : class Operand BASE_EMBEDDED {
338 : public:
339 : // [base + disp/r]
340 : Operand(Register base, int32_t disp);
341 :
342 : // [base + index*scale + disp/r]
343 : Operand(Register base,
344 : Register index,
345 : ScaleFactor scale,
346 : int32_t disp);
347 :
348 : // [index*scale + disp/r]
349 : Operand(Register index,
350 : ScaleFactor scale,
351 : int32_t disp);
352 :
353 : // Offset from existing memory operand.
354 : // Offset is added to existing displacement as 32-bit signed values and
355 : // this must not overflow.
356 : Operand(const Operand& base, int32_t offset);
357 :
358 : // [rip + disp/r]
359 : explicit Operand(Label* label);
360 :
361 : // Checks whether either base or index register is the given register.
362 : // Does not check the "reg" part of the Operand.
363 : bool AddressUsesRegister(Register reg) const;
364 :
365 : // Queries related to the size of the generated instruction.
366 : // Whether the generated instruction will have a REX prefix.
367 : bool requires_rex() const { return rex_ != 0; }
368 : // Size of the ModR/M, SIB and displacement parts of the generated
369 : // instruction.
370 : int operand_size() const { return len_; }
371 :
372 : private:
373 : byte rex_;
374 : byte buf_[9];
375 : // The number of bytes of buf_ in use.
376 : byte len_;
377 :
378 : // Set the ModR/M byte without an encoded 'reg' register. The
379 : // register is encoded later as part of the emit_operand operation.
380 : // set_modrm can be called before or after set_sib and set_disp*.
381 : inline void set_modrm(int mod, Register rm);
382 :
383 : // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
384 : inline void set_sib(ScaleFactor scale, Register index, Register base);
385 :
386 : // Adds operand displacement fields (offsets added to the memory address).
387 : // Needs to be called after set_sib, not before it.
388 : inline void set_disp8(int disp);
389 : inline void set_disp32(int disp);
390 : inline void set_disp64(int64_t disp); // for labels.
391 :
392 : friend class Assembler;
393 : };
394 :
395 : #define ASSEMBLER_INSTRUCTION_LIST(V) \
396 : V(add) \
397 : V(and) \
398 : V(cmp) \
399 : V(cmpxchg) \
400 : V(dec) \
401 : V(idiv) \
402 : V(div) \
403 : V(imul) \
404 : V(inc) \
405 : V(lea) \
406 : V(mov) \
407 : V(movzxb) \
408 : V(movzxw) \
409 : V(neg) \
410 : V(not) \
411 : V(or) \
412 : V(repmovs) \
413 : V(sbb) \
414 : V(sub) \
415 : V(test) \
416 : V(xchg) \
417 : V(xor)
418 :
419 : // Shift instructions on operands/registers with kPointerSize, kInt32Size and
420 : // kInt64Size.
421 : #define SHIFT_INSTRUCTION_LIST(V) \
422 : V(rol, 0x0) \
423 : V(ror, 0x1) \
424 : V(rcl, 0x2) \
425 : V(rcr, 0x3) \
426 : V(shl, 0x4) \
427 : V(shr, 0x5) \
428 : V(sar, 0x7)
429 :
430 : class Assembler : public AssemblerBase {
431 : private:
432 : // We check before assembling an instruction that there is sufficient
433 : // space to write an instruction and its relocation information.
434 : // The relocation writer's position must be kGap bytes above the end of
435 : // the generated instructions. This leaves enough space for the
436 : // longest possible x64 instruction, 15 bytes, and the longest possible
437 : // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
438 : // (There is a 15 byte limit on x64 instruction length that rules out some
439 : // otherwise valid instructions.)
440 : // This allows for a single, fast space check per instruction.
441 : static constexpr int kGap = 32;
442 :
443 : public:
444 : // Create an assembler. Instructions and relocation information are emitted
445 : // into a buffer, with the instructions starting from the beginning and the
446 : // relocation information starting from the end of the buffer. See CodeDesc
447 : // for a detailed comment on the layout (globals.h).
448 : //
449 : // If the provided buffer is nullptr, the assembler allocates and grows its
450 : // own buffer, and buffer_size determines the initial buffer size. The buffer
451 : // is owned by the assembler and deallocated upon destruction of the
452 : // assembler.
453 : //
454 : // If the provided buffer is not nullptr, the assembler uses the provided
455 : // buffer for code generation and assumes its size to be buffer_size. If the
456 : // buffer is too small, a fatal error occurs. No deallocation of the buffer is
457 : // done upon destruction of the assembler.
458 1708901 : Assembler(Isolate* isolate, void* buffer, int buffer_size)
459 1708901 : : Assembler(IsolateData(isolate), buffer, buffer_size) {}
460 : Assembler(IsolateData isolate_data, void* buffer, int buffer_size);
461 3418049 : virtual ~Assembler() {}
462 :
463 : // GetCode emits any pending (non-emitted) code and fills the descriptor
464 : // desc. GetCode() is idempotent; it returns the same result if no other
465 : // Assembler functions are invoked in between GetCode() calls.
466 : void GetCode(Isolate* isolate, CodeDesc* desc);
467 :
468 : // Read/Modify the code target in the relative branch/call instruction at pc.
469 : // On the x64 architecture, we use relative jumps with a 32-bit displacement
470 : // to jump to other Code objects in the Code space in the heap.
471 : // Jumps to C functions are done indirectly through a 64-bit register holding
472 : // the absolute address of the target.
473 : // These functions convert between absolute Addresses of Code objects and
474 : // the relative displacements stored in the code.
475 : // The isolate argument is unused (and may be nullptr) when skipping flushing.
476 : static inline Address target_address_at(Address pc, Address constant_pool);
477 : static inline void set_target_address_at(
478 : Isolate* isolate, Address pc, Address constant_pool, Address target,
479 : ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
480 : static inline Address target_address_at(Address pc, Code* code);
481 : static inline void set_target_address_at(
482 : Isolate* isolate, Address pc, Code* code, Address target,
483 : ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
484 :
485 : // Return the code target address at a call site from the return address
486 : // of that call in the instruction stream.
487 : static inline Address target_address_from_return_address(Address pc);
488 :
489 : // This sets the branch destination (which is in the instruction on x64).
490 : // This is for calls and branches within generated code.
491 : inline static void deserialization_set_special_target_at(
492 : Isolate* isolate, Address instruction_payload, Code* code,
493 : Address target);
494 :
495 : // This sets the internal reference at the pc.
496 : inline static void deserialization_set_target_internal_reference_at(
497 : Isolate* isolate, Address pc, Address target,
498 : RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
499 :
500 : static inline RelocInfo::Mode RelocInfoNone() {
501 : if (kPointerSize == kInt64Size) {
502 : return RelocInfo::NONE64;
503 : } else {
504 : DCHECK_EQ(kPointerSize, kInt32Size);
505 : return RelocInfo::NONE32;
506 : }
507 : }
508 :
509 : inline Handle<Code> code_target_object_handle_at(Address pc);
510 : inline Address runtime_entry_at(Address pc);
511 : // Number of bytes taken up by the branch target in the code.
512 : static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
513 : // Distance between the address of the code target in the call instruction
514 : // and the return address pushed on the stack.
515 : static constexpr int kCallTargetAddressOffset = 4; // 32-bit displacement.
516 : // The length of call(kScratchRegister).
517 : static constexpr int kCallScratchRegisterInstructionLength = 3;
518 : // The length of call(Immediate32).
519 : static constexpr int kShortCallInstructionLength = 5;
520 : // The length of movq(kScratchRegister, address).
521 : static constexpr int kMoveAddressIntoScratchRegisterInstructionLength =
522 : 2 + kPointerSize;
523 : // The length of movq(kScratchRegister, address) and call(kScratchRegister).
524 : static constexpr int kCallSequenceLength =
525 : kMoveAddressIntoScratchRegisterInstructionLength +
526 : kCallScratchRegisterInstructionLength;
527 :
528 : // One byte opcode for test eax,0xXXXXXXXX.
529 : static constexpr byte kTestEaxByte = 0xA9;
530 : // One byte opcode for test al, 0xXX.
531 : static constexpr byte kTestAlByte = 0xA8;
532 : // One byte opcode for nop.
533 : static constexpr byte kNopByte = 0x90;
534 :
535 : // One byte prefix for a short conditional jump.
536 : static constexpr byte kJccShortPrefix = 0x70;
537 : static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
538 : static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
539 : static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
540 : static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
541 :
542 : // VEX prefix encodings.
543 : enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
544 : enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
545 : enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
546 : enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
547 :
548 : // ---------------------------------------------------------------------------
549 : // Code generation
550 : //
551 : // Function names correspond one-to-one to x64 instruction mnemonics.
552 : // Unless specified otherwise, instructions operate on 64-bit operands.
553 : //
554 : // If we need versions of an assembly instruction that operate on different
555 : // width arguments, we add a single-letter suffix specifying the width.
556 : // This is done for the following instructions: mov, cmp, inc, dec,
557 : // add, sub, and test.
558 : // There are no versions of these instructions without the suffix.
559 : // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
560 : // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
561 : // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
562 : // - Instructions on 64-bit (quadword) operands/registers use 'q'.
563 : // - Instructions on operands/registers with pointer size use 'p'.
564 :
565 : STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
566 :
567 : #define DECLARE_INSTRUCTION(instruction) \
568 : template<class P1> \
569 : void instruction##p(P1 p1) { \
570 : emit_##instruction(p1, kPointerSize); \
571 : } \
572 : \
573 : template<class P1> \
574 : void instruction##l(P1 p1) { \
575 : emit_##instruction(p1, kInt32Size); \
576 : } \
577 : \
578 : template<class P1> \
579 : void instruction##q(P1 p1) { \
580 : emit_##instruction(p1, kInt64Size); \
581 : } \
582 : \
583 : template<class P1, class P2> \
584 : void instruction##p(P1 p1, P2 p2) { \
585 : emit_##instruction(p1, p2, kPointerSize); \
586 : } \
587 : \
588 : template<class P1, class P2> \
589 : void instruction##l(P1 p1, P2 p2) { \
590 : emit_##instruction(p1, p2, kInt32Size); \
591 : } \
592 : \
593 : template<class P1, class P2> \
594 : void instruction##q(P1 p1, P2 p2) { \
595 : emit_##instruction(p1, p2, kInt64Size); \
596 : } \
597 : \
598 : template<class P1, class P2, class P3> \
599 : void instruction##p(P1 p1, P2 p2, P3 p3) { \
600 : emit_##instruction(p1, p2, p3, kPointerSize); \
601 : } \
602 : \
603 : template<class P1, class P2, class P3> \
604 : void instruction##l(P1 p1, P2 p2, P3 p3) { \
605 : emit_##instruction(p1, p2, p3, kInt32Size); \
606 : } \
607 : \
608 : template<class P1, class P2, class P3> \
609 : void instruction##q(P1 p1, P2 p2, P3 p3) { \
610 : emit_##instruction(p1, p2, p3, kInt64Size); \
611 : }
612 65618382 : ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
613 : #undef DECLARE_INSTRUCTION
614 :
615 : // Insert the smallest number of nop instructions
616 : // possible to align the pc offset to a multiple
617 : // of m, where m must be a power of 2.
618 : void Align(int m);
619 : // Insert the smallest number of zero bytes possible to align the pc offset
620 : // to a mulitple of m. m must be a power of 2 (>= 2).
621 : void DataAlign(int m);
622 : void Nop(int bytes = 1);
623 : // Aligns code to something that's optimal for a jump target for the platform.
624 : void CodeTargetAlign();
625 :
626 : // Stack
627 : void pushfq();
628 : void popfq();
629 :
630 : void pushq(Immediate value);
631 : // Push a 32 bit integer, and guarantee that it is actually pushed as a
632 : // 32 bit value, the normal push will optimize the 8 bit case.
633 : void pushq_imm32(int32_t imm32);
634 : void pushq(Register src);
635 : void pushq(const Operand& src);
636 :
637 : void popq(Register dst);
638 : void popq(const Operand& dst);
639 :
640 : void enter(Immediate size);
641 : void leave();
642 :
643 : // Moves
644 : void movb(Register dst, const Operand& src);
645 : void movb(Register dst, Immediate imm);
646 : void movb(const Operand& dst, Register src);
647 : void movb(const Operand& dst, Immediate imm);
648 :
649 : // Move the low 16 bits of a 64-bit register value to a 16-bit
650 : // memory location.
651 : void movw(Register dst, const Operand& src);
652 : void movw(const Operand& dst, Register src);
653 : void movw(const Operand& dst, Immediate imm);
654 :
655 : // Move the offset of the label location relative to the current
656 : // position (after the move) to the destination.
657 : void movl(const Operand& dst, Label* src);
658 :
659 : // Loads a pointer into a register with a relocation mode.
660 : void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
661 :
662 : // Load a heap number into a register.
663 : // The heap number will not be allocated and embedded into the code right
664 : // away. Instead, we emit the load of a dummy object. Later, when calling
665 : // Assembler::GetCode, the heap number will be allocated and the code will be
666 : // patched by replacing the dummy with the actual object. The RelocInfo for
667 : // the embedded object gets already recorded correctly when emitting the dummy
668 : // move.
669 : void movp_heap_number(Register dst, double value);
670 :
671 : // Loads a 64-bit immediate into a register.
672 : void movq(Register dst, int64_t value,
673 : RelocInfo::Mode rmode = RelocInfo::NONE64);
674 : void movq(Register dst, uint64_t value,
675 : RelocInfo::Mode rmode = RelocInfo::NONE64);
676 :
677 : void movsxbl(Register dst, Register src);
678 : void movsxbl(Register dst, const Operand& src);
679 : void movsxbq(Register dst, Register src);
680 : void movsxbq(Register dst, const Operand& src);
681 : void movsxwl(Register dst, Register src);
682 : void movsxwl(Register dst, const Operand& src);
683 : void movsxwq(Register dst, Register src);
684 : void movsxwq(Register dst, const Operand& src);
685 : void movsxlq(Register dst, Register src);
686 : void movsxlq(Register dst, const Operand& src);
687 :
688 : // Repeated moves.
689 :
690 : void repmovsb();
691 : void repmovsw();
692 : void repmovsp() { emit_repmovs(kPointerSize); }
693 : void repmovsl() { emit_repmovs(kInt32Size); }
694 : void repmovsq() { emit_repmovs(kInt64Size); }
695 :
696 : // Instruction to load from an immediate 64-bit pointer into RAX.
697 : void load_rax(void* ptr, RelocInfo::Mode rmode);
698 : void load_rax(ExternalReference ext);
699 :
700 : // Conditional moves.
701 : void cmovq(Condition cc, Register dst, Register src);
702 : void cmovq(Condition cc, Register dst, const Operand& src);
703 : void cmovl(Condition cc, Register dst, Register src);
704 : void cmovl(Condition cc, Register dst, const Operand& src);
705 :
706 : void cmpb(Register dst, Immediate src) {
707 95341 : immediate_arithmetic_op_8(0x7, dst, src);
708 : }
709 :
710 : void cmpb_al(Immediate src);
711 :
712 : void cmpb(Register dst, Register src) {
713 4188 : arithmetic_op_8(0x3A, dst, src);
714 : }
715 :
716 : void cmpb(Register dst, const Operand& src) {
717 583 : arithmetic_op_8(0x3A, dst, src);
718 : }
719 :
720 : void cmpb(const Operand& dst, Register src) {
721 820 : arithmetic_op_8(0x38, src, dst);
722 : }
723 :
724 : void cmpb(const Operand& dst, Immediate src) {
725 117288 : immediate_arithmetic_op_8(0x7, dst, src);
726 : }
727 :
728 : void cmpw(const Operand& dst, Immediate src) {
729 8 : immediate_arithmetic_op_16(0x7, dst, src);
730 : }
731 :
732 : void cmpw(Register dst, Immediate src) {
733 3488 : immediate_arithmetic_op_16(0x7, dst, src);
734 : }
735 :
736 : void cmpw(Register dst, const Operand& src) {
737 77 : arithmetic_op_16(0x3B, dst, src);
738 : }
739 :
740 : void cmpw(Register dst, Register src) {
741 0 : arithmetic_op_16(0x3B, dst, src);
742 : }
743 :
744 : void cmpw(const Operand& dst, Register src) {
745 8 : arithmetic_op_16(0x39, src, dst);
746 : }
747 :
748 0 : void testb(Register reg, const Operand& op) { testb(op, reg); }
749 :
750 0 : void testw(Register reg, const Operand& op) { testw(op, reg); }
751 :
752 : void andb(Register dst, Immediate src) {
753 : immediate_arithmetic_op_8(0x4, dst, src);
754 : }
755 :
756 : void decb(Register dst);
757 : void decb(const Operand& dst);
758 :
759 : // Lock prefix.
760 : void lock();
761 :
762 : void xchgb(Register reg, const Operand& op);
763 : void xchgw(Register reg, const Operand& op);
764 :
765 : void cmpxchgb(const Operand& dst, Register src);
766 : void cmpxchgw(const Operand& dst, Register src);
767 :
768 : // Sign-extends rax into rdx:rax.
769 : void cqo();
770 : // Sign-extends eax into edx:eax.
771 : void cdq();
772 :
773 : // Multiply eax by src, put the result in edx:eax.
774 : void mull(Register src);
775 : void mull(const Operand& src);
776 : // Multiply rax by src, put the result in rdx:rax.
777 : void mulq(Register src);
778 :
779 : #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
780 : void instruction##p(Register dst, Immediate imm8) { \
781 : shift(dst, imm8, subcode, kPointerSize); \
782 : } \
783 : \
784 : void instruction##l(Register dst, Immediate imm8) { \
785 : shift(dst, imm8, subcode, kInt32Size); \
786 : } \
787 : \
788 : void instruction##q(Register dst, Immediate imm8) { \
789 : shift(dst, imm8, subcode, kInt64Size); \
790 : } \
791 : \
792 : void instruction##p(Operand dst, Immediate imm8) { \
793 : shift(dst, imm8, subcode, kPointerSize); \
794 : } \
795 : \
796 : void instruction##l(Operand dst, Immediate imm8) { \
797 : shift(dst, imm8, subcode, kInt32Size); \
798 : } \
799 : \
800 : void instruction##q(Operand dst, Immediate imm8) { \
801 : shift(dst, imm8, subcode, kInt64Size); \
802 : } \
803 : \
804 : void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
805 : \
806 : void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
807 : \
808 : void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
809 : \
810 : void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \
811 : \
812 : void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
813 : \
814 : void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
815 830849 : SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
816 : #undef DECLARE_SHIFT_INSTRUCTION
817 :
818 : // Shifts dst:src left by cl bits, affecting only dst.
819 : void shld(Register dst, Register src);
820 :
821 : // Shifts src:dst right by cl bits, affecting only dst.
822 : void shrd(Register dst, Register src);
823 :
824 : void store_rax(void* dst, RelocInfo::Mode mode);
825 : void store_rax(ExternalReference ref);
826 :
827 : void subb(Register dst, Immediate src) {
828 3368 : immediate_arithmetic_op_8(0x5, dst, src);
829 : }
830 :
831 : void testb(Register dst, Register src);
832 : void testb(Register reg, Immediate mask);
833 : void testb(const Operand& op, Immediate mask);
834 : void testb(const Operand& op, Register reg);
835 :
836 : void testw(Register dst, Register src);
837 : void testw(Register reg, Immediate mask);
838 : void testw(const Operand& op, Immediate mask);
839 : void testw(const Operand& op, Register reg);
840 :
841 : // Bit operations.
842 : void bt(const Operand& dst, Register src);
843 : void bts(const Operand& dst, Register src);
844 : void bsrq(Register dst, Register src);
845 : void bsrq(Register dst, const Operand& src);
846 : void bsrl(Register dst, Register src);
847 : void bsrl(Register dst, const Operand& src);
848 : void bsfq(Register dst, Register src);
849 : void bsfq(Register dst, const Operand& src);
850 : void bsfl(Register dst, Register src);
851 : void bsfl(Register dst, const Operand& src);
852 :
853 : // Miscellaneous
854 : void clc();
855 : void cld();
856 : void cpuid();
857 : void hlt();
858 : void int3();
859 : void nop();
860 : void ret(int imm16);
861 : void ud2();
862 : void setcc(Condition cc, Register reg);
863 :
864 : void pshufw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
865 : void pshufw(XMMRegister dst, const Operand& src, uint8_t shuffle);
866 :
867 : // Label operations & relative jumps (PPUM Appendix D)
868 : //
869 : // Takes a branch opcode (cc) and a label (L) and generates
870 : // either a backward branch or a forward branch and links it
871 : // to the label fixup chain. Usage:
872 : //
873 : // Label L; // unbound label
874 : // j(cc, &L); // forward branch to unbound label
875 : // bind(&L); // bind label to the current pc
876 : // j(cc, &L); // backward branch to bound label
877 : // bind(&L); // illegal: a label may be bound only once
878 : //
879 : // Note: The same Label can be used for forward and backward branches
880 : // but it may be bound only once.
881 :
882 : void bind(Label* L); // binds an unbound label L to the current code position
883 :
884 : // Calls
885 : // Call near relative 32-bit displacement, relative to next instruction.
886 : void call(Label* L);
887 : void call(Address entry, RelocInfo::Mode rmode);
888 : void call(CodeStub* stub);
889 : void call(Handle<Code> target,
890 : RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
891 :
892 : // Calls directly to the given address using a relative offset.
893 : // Should only ever be used in Code objects for calls within the
894 : // same Code object. Should not be used when generating new code (use labels),
895 : // but only when patching existing code.
896 : void call(Address target);
897 :
898 : // Call near absolute indirect, address in register
899 : void call(Register adr);
900 :
901 : // Jumps
902 : // Jump short or near relative.
903 : // Use a 32-bit signed displacement.
904 : // Unconditional jump to L
905 : void jmp(Label* L, Label::Distance distance = Label::kFar);
906 : void jmp(Handle<Code> target, RelocInfo::Mode rmode);
907 :
908 : // Jump near absolute indirect (r64)
909 : void jmp(Register adr);
910 : void jmp(const Operand& src);
911 :
912 : // Conditional jumps
913 : void j(Condition cc,
914 : Label* L,
915 : Label::Distance distance = Label::kFar);
916 : void j(Condition cc, Address entry, RelocInfo::Mode rmode);
917 : void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
918 :
919 : // Floating-point operations
920 : void fld(int i);
921 :
922 : void fld1();
923 : void fldz();
924 : void fldpi();
925 : void fldln2();
926 :
927 : void fld_s(const Operand& adr);
928 : void fld_d(const Operand& adr);
929 :
930 : void fstp_s(const Operand& adr);
931 : void fstp_d(const Operand& adr);
932 : void fstp(int index);
933 :
934 : void fild_s(const Operand& adr);
935 : void fild_d(const Operand& adr);
936 :
937 : void fist_s(const Operand& adr);
938 :
939 : void fistp_s(const Operand& adr);
940 : void fistp_d(const Operand& adr);
941 :
942 : void fisttp_s(const Operand& adr);
943 : void fisttp_d(const Operand& adr);
944 :
945 : void fabs();
946 : void fchs();
947 :
948 : void fadd(int i);
949 : void fsub(int i);
950 : void fmul(int i);
951 : void fdiv(int i);
952 :
953 : void fisub_s(const Operand& adr);
954 :
955 : void faddp(int i = 1);
956 : void fsubp(int i = 1);
957 : void fsubrp(int i = 1);
958 : void fmulp(int i = 1);
959 : void fdivp(int i = 1);
960 : void fprem();
961 : void fprem1();
962 :
963 : void fxch(int i = 1);
964 : void fincstp();
965 : void ffree(int i = 0);
966 :
967 : void ftst();
968 : void fucomp(int i);
969 : void fucompp();
970 : void fucomi(int i);
971 : void fucomip();
972 :
973 : void fcompp();
974 : void fnstsw_ax();
975 : void fwait();
976 : void fnclex();
977 :
978 : void fsin();
979 : void fcos();
980 : void fptan();
981 : void fyl2x();
982 : void f2xm1();
983 : void fscale();
984 : void fninit();
985 :
986 : void frndint();
987 :
988 : void sahf();
989 :
990 : // SSE instructions
991 : void addss(XMMRegister dst, XMMRegister src);
992 : void addss(XMMRegister dst, const Operand& src);
993 : void subss(XMMRegister dst, XMMRegister src);
994 : void subss(XMMRegister dst, const Operand& src);
995 : void mulss(XMMRegister dst, XMMRegister src);
996 : void mulss(XMMRegister dst, const Operand& src);
997 : void divss(XMMRegister dst, XMMRegister src);
998 : void divss(XMMRegister dst, const Operand& src);
999 :
1000 : void maxss(XMMRegister dst, XMMRegister src);
1001 : void maxss(XMMRegister dst, const Operand& src);
1002 : void minss(XMMRegister dst, XMMRegister src);
1003 : void minss(XMMRegister dst, const Operand& src);
1004 :
1005 : void sqrtss(XMMRegister dst, XMMRegister src);
1006 : void sqrtss(XMMRegister dst, const Operand& src);
1007 :
1008 : void ucomiss(XMMRegister dst, XMMRegister src);
1009 : void ucomiss(XMMRegister dst, const Operand& src);
1010 : void movaps(XMMRegister dst, XMMRegister src);
1011 :
1012 : // Don't use this unless it's important to keep the
1013 : // top half of the destination register unchanged.
1014 : // Use movaps when moving float values and movd for integer
1015 : // values in xmm registers.
1016 : void movss(XMMRegister dst, XMMRegister src);
1017 :
1018 : void movss(XMMRegister dst, const Operand& src);
1019 : void movss(const Operand& dst, XMMRegister src);
1020 : void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1021 :
1022 : void cvttss2si(Register dst, const Operand& src);
1023 : void cvttss2si(Register dst, XMMRegister src);
1024 : void cvtlsi2ss(XMMRegister dst, const Operand& src);
1025 : void cvtlsi2ss(XMMRegister dst, Register src);
1026 :
1027 : void andps(XMMRegister dst, XMMRegister src);
1028 : void andps(XMMRegister dst, const Operand& src);
1029 : void orps(XMMRegister dst, XMMRegister src);
1030 : void orps(XMMRegister dst, const Operand& src);
1031 : void xorps(XMMRegister dst, XMMRegister src);
1032 : void xorps(XMMRegister dst, const Operand& src);
1033 :
1034 : void addps(XMMRegister dst, XMMRegister src);
1035 : void addps(XMMRegister dst, const Operand& src);
1036 : void subps(XMMRegister dst, XMMRegister src);
1037 : void subps(XMMRegister dst, const Operand& src);
1038 : void mulps(XMMRegister dst, XMMRegister src);
1039 : void mulps(XMMRegister dst, const Operand& src);
1040 : void divps(XMMRegister dst, XMMRegister src);
1041 : void divps(XMMRegister dst, const Operand& src);
1042 :
1043 : void movmskps(Register dst, XMMRegister src);
1044 :
1045 : void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1046 : SIMDPrefix pp, LeadingOpcode m, VexW w);
1047 : void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1048 : SIMDPrefix pp, LeadingOpcode m, VexW w);
1049 :
1050 : // SSE2 instructions
1051 : void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
1052 : byte opcode);
1053 : void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
1054 : byte opcode);
1055 : #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1056 : void instruction(XMMRegister dst, XMMRegister src) { \
1057 : sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1058 : } \
1059 : void instruction(XMMRegister dst, const Operand& src) { \
1060 : sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1061 : }
1062 :
1063 1102 : SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1064 : #undef DECLARE_SSE2_INSTRUCTION
1065 :
1066 : #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1067 : void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1068 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1069 : } \
1070 : void v##instruction(XMMRegister dst, XMMRegister src1, \
1071 : const Operand& src2) { \
1072 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1073 : }
1074 :
1075 46578 : SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1076 : #undef DECLARE_SSE2_AVX_INSTRUCTION
1077 :
1078 : // SSE3
1079 : void lddqu(XMMRegister dst, const Operand& src);
1080 :
1081 : // SSSE3
1082 : void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1083 : byte escape2, byte opcode);
1084 : void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
1085 : byte escape1, byte escape2, byte opcode);
1086 :
1087 : #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1088 : opcode) \
1089 : void instruction(XMMRegister dst, XMMRegister src) { \
1090 : ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1091 : } \
1092 : void instruction(XMMRegister dst, const Operand& src) { \
1093 : ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1094 : }
1095 :
1096 432 : SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1097 : #undef DECLARE_SSSE3_INSTRUCTION
1098 :
1099 : // SSE4
1100 : void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1101 : byte escape2, byte opcode);
1102 : void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
1103 : byte escape1, byte escape2, byte opcode);
1104 : #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1105 : opcode) \
1106 : void instruction(XMMRegister dst, XMMRegister src) { \
1107 : sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1108 : } \
1109 : void instruction(XMMRegister dst, const Operand& src) { \
1110 : sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1111 : }
1112 :
1113 282 : SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1114 : #undef DECLARE_SSE4_INSTRUCTION
1115 :
1116 : #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1117 : opcode) \
1118 : void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1119 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1120 : } \
1121 : void v##instruction(XMMRegister dst, XMMRegister src1, \
1122 : const Operand& src2) { \
1123 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1124 : }
1125 :
1126 108 : SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1127 132 : SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1128 : #undef DECLARE_SSE34_AVX_INSTRUCTION
1129 :
1130 : void movd(XMMRegister dst, Register src);
1131 : void movd(XMMRegister dst, const Operand& src);
1132 : void movd(Register dst, XMMRegister src);
1133 : void movq(XMMRegister dst, Register src);
1134 : void movq(Register dst, XMMRegister src);
1135 : void movq(XMMRegister dst, XMMRegister src);
1136 :
1137 : // Don't use this unless it's important to keep the
1138 : // top half of the destination register unchanged.
1139 : // Use movapd when moving double values and movq for integer
1140 : // values in xmm registers.
1141 : void movsd(XMMRegister dst, XMMRegister src);
1142 :
1143 : void movsd(const Operand& dst, XMMRegister src);
1144 : void movsd(XMMRegister dst, const Operand& src);
1145 :
1146 : void movdqa(const Operand& dst, XMMRegister src);
1147 : void movdqa(XMMRegister dst, const Operand& src);
1148 :
1149 : void movdqu(const Operand& dst, XMMRegister src);
1150 : void movdqu(XMMRegister dst, const Operand& src);
1151 :
1152 : void movapd(XMMRegister dst, XMMRegister src);
1153 : void movupd(XMMRegister dst, const Operand& src);
1154 : void movupd(const Operand& dst, XMMRegister src);
1155 :
1156 : void psllq(XMMRegister reg, byte imm8);
1157 : void psrlq(XMMRegister reg, byte imm8);
1158 : void psllw(XMMRegister reg, byte imm8);
1159 : void pslld(XMMRegister reg, byte imm8);
1160 : void psrlw(XMMRegister reg, byte imm8);
1161 : void psrld(XMMRegister reg, byte imm8);
1162 : void psraw(XMMRegister reg, byte imm8);
1163 : void psrad(XMMRegister reg, byte imm8);
1164 :
1165 : void cvttsd2si(Register dst, const Operand& src);
1166 : void cvttsd2si(Register dst, XMMRegister src);
1167 : void cvttss2siq(Register dst, XMMRegister src);
1168 : void cvttss2siq(Register dst, const Operand& src);
1169 : void cvttsd2siq(Register dst, XMMRegister src);
1170 : void cvttsd2siq(Register dst, const Operand& src);
1171 :
1172 : void cvtlsi2sd(XMMRegister dst, const Operand& src);
1173 : void cvtlsi2sd(XMMRegister dst, Register src);
1174 :
1175 : void cvtqsi2ss(XMMRegister dst, const Operand& src);
1176 : void cvtqsi2ss(XMMRegister dst, Register src);
1177 :
1178 : void cvtqsi2sd(XMMRegister dst, const Operand& src);
1179 : void cvtqsi2sd(XMMRegister dst, Register src);
1180 :
1181 :
1182 : void cvtss2sd(XMMRegister dst, XMMRegister src);
1183 : void cvtss2sd(XMMRegister dst, const Operand& src);
1184 : void cvtsd2ss(XMMRegister dst, XMMRegister src);
1185 : void cvtsd2ss(XMMRegister dst, const Operand& src);
1186 :
1187 : void cvtsd2si(Register dst, XMMRegister src);
1188 : void cvtsd2siq(Register dst, XMMRegister src);
1189 :
1190 : void addsd(XMMRegister dst, XMMRegister src);
1191 : void addsd(XMMRegister dst, const Operand& src);
1192 : void subsd(XMMRegister dst, XMMRegister src);
1193 : void subsd(XMMRegister dst, const Operand& src);
1194 : void mulsd(XMMRegister dst, XMMRegister src);
1195 : void mulsd(XMMRegister dst, const Operand& src);
1196 : void divsd(XMMRegister dst, XMMRegister src);
1197 : void divsd(XMMRegister dst, const Operand& src);
1198 :
1199 : void maxsd(XMMRegister dst, XMMRegister src);
1200 : void maxsd(XMMRegister dst, const Operand& src);
1201 : void minsd(XMMRegister dst, XMMRegister src);
1202 : void minsd(XMMRegister dst, const Operand& src);
1203 :
1204 : void andpd(XMMRegister dst, XMMRegister src);
1205 : void andpd(XMMRegister dst, const Operand& src);
1206 : void orpd(XMMRegister dst, XMMRegister src);
1207 : void orpd(XMMRegister dst, const Operand& src);
1208 : void xorpd(XMMRegister dst, XMMRegister src);
1209 : void xorpd(XMMRegister dst, const Operand& src);
1210 : void sqrtsd(XMMRegister dst, XMMRegister src);
1211 : void sqrtsd(XMMRegister dst, const Operand& src);
1212 :
1213 : void ucomisd(XMMRegister dst, XMMRegister src);
1214 : void ucomisd(XMMRegister dst, const Operand& src);
1215 : void cmpltsd(XMMRegister dst, XMMRegister src);
1216 :
1217 : void movmskpd(Register dst, XMMRegister src);
1218 :
1219 : void punpckldq(XMMRegister dst, XMMRegister src);
1220 : void punpckldq(XMMRegister dst, const Operand& src);
1221 : void punpckhdq(XMMRegister dst, XMMRegister src);
1222 :
1223 : // SSE 4.1 instruction
1224 : void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1225 : void extractps(Register dst, XMMRegister src, byte imm8);
1226 : void pextrb(Register dst, XMMRegister src, int8_t imm8);
1227 : void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
1228 : void pextrw(Register dst, XMMRegister src, int8_t imm8);
1229 : void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
1230 : void pextrd(Register dst, XMMRegister src, int8_t imm8);
1231 : void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
1232 : void pinsrb(XMMRegister dst, Register src, int8_t imm8);
1233 : void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
1234 : void pinsrw(XMMRegister dst, Register src, int8_t imm8);
1235 : void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
1236 : void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1237 : void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1238 :
1239 : void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1240 : void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1241 :
1242 : void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1243 : void cmpps(XMMRegister dst, const Operand& src, int8_t cmp);
1244 : void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1245 : void cmppd(XMMRegister dst, const Operand& src, int8_t cmp);
1246 :
1247 : #define SSE_CMP_P(instr, imm8) \
1248 : void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1249 : void instr##ps(XMMRegister dst, const Operand& src) { \
1250 : cmpps(dst, src, imm8); \
1251 : } \
1252 : void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1253 : void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); }
1254 :
1255 24 : SSE_CMP_P(cmpeq, 0x0);
1256 24 : SSE_CMP_P(cmplt, 0x1);
1257 24 : SSE_CMP_P(cmple, 0x2);
1258 24 : SSE_CMP_P(cmpneq, 0x4);
1259 24 : SSE_CMP_P(cmpnlt, 0x5);
1260 24 : SSE_CMP_P(cmpnle, 0x6);
1261 :
1262 : #undef SSE_CMP_P
1263 :
1264 : void minps(XMMRegister dst, XMMRegister src);
1265 : void minps(XMMRegister dst, const Operand& src);
1266 : void maxps(XMMRegister dst, XMMRegister src);
1267 : void maxps(XMMRegister dst, const Operand& src);
1268 : void rcpps(XMMRegister dst, XMMRegister src);
1269 : void rcpps(XMMRegister dst, const Operand& src);
1270 : void rsqrtps(XMMRegister dst, XMMRegister src);
1271 : void rsqrtps(XMMRegister dst, const Operand& src);
1272 : void sqrtps(XMMRegister dst, XMMRegister src);
1273 : void sqrtps(XMMRegister dst, const Operand& src);
1274 : void movups(XMMRegister dst, XMMRegister src);
1275 : void movups(XMMRegister dst, const Operand& src);
1276 : void movups(const Operand& dst, XMMRegister src);
1277 : void psrldq(XMMRegister dst, uint8_t shift);
1278 : void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1279 : void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
1280 : void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1281 : void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1282 : void cvtdq2ps(XMMRegister dst, XMMRegister src);
1283 : void cvtdq2ps(XMMRegister dst, const Operand& src);
1284 :
1285 : // AVX instruction
1286 : void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1287 18 : vfmasd(0x99, dst, src1, src2);
1288 : }
1289 : void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1290 18 : vfmasd(0xa9, dst, src1, src2);
1291 : }
1292 : void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1293 18 : vfmasd(0xb9, dst, src1, src2);
1294 : }
1295 : void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1296 18 : vfmasd(0x99, dst, src1, src2);
1297 : }
1298 : void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1299 18 : vfmasd(0xa9, dst, src1, src2);
1300 : }
1301 : void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1302 18 : vfmasd(0xb9, dst, src1, src2);
1303 : }
1304 : void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1305 12 : vfmasd(0x9b, dst, src1, src2);
1306 : }
1307 : void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1308 12 : vfmasd(0xab, dst, src1, src2);
1309 : }
1310 : void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1311 12 : vfmasd(0xbb, dst, src1, src2);
1312 : }
1313 : void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1314 12 : vfmasd(0x9b, dst, src1, src2);
1315 : }
1316 : void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1317 12 : vfmasd(0xab, dst, src1, src2);
1318 : }
1319 : void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1320 12 : vfmasd(0xbb, dst, src1, src2);
1321 : }
1322 : void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1323 12 : vfmasd(0x9d, dst, src1, src2);
1324 : }
1325 : void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1326 12 : vfmasd(0xad, dst, src1, src2);
1327 : }
1328 : void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1329 12 : vfmasd(0xbd, dst, src1, src2);
1330 : }
1331 : void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1332 12 : vfmasd(0x9d, dst, src1, src2);
1333 : }
1334 : void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1335 12 : vfmasd(0xad, dst, src1, src2);
1336 : }
1337 : void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1338 12 : vfmasd(0xbd, dst, src1, src2);
1339 : }
1340 : void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1341 12 : vfmasd(0x9f, dst, src1, src2);
1342 : }
1343 : void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1344 12 : vfmasd(0xaf, dst, src1, src2);
1345 : }
1346 : void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1347 12 : vfmasd(0xbf, dst, src1, src2);
1348 : }
1349 : void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1350 12 : vfmasd(0x9f, dst, src1, src2);
1351 : }
1352 : void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1353 12 : vfmasd(0xaf, dst, src1, src2);
1354 : }
1355 : void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1356 12 : vfmasd(0xbf, dst, src1, src2);
1357 : }
1358 : void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1359 : void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1360 :
1361 : void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1362 12 : vfmass(0x99, dst, src1, src2);
1363 : }
1364 : void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1365 12 : vfmass(0xa9, dst, src1, src2);
1366 : }
1367 : void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1368 12 : vfmass(0xb9, dst, src1, src2);
1369 : }
1370 : void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1371 12 : vfmass(0x99, dst, src1, src2);
1372 : }
1373 : void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1374 12 : vfmass(0xa9, dst, src1, src2);
1375 : }
1376 : void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1377 12 : vfmass(0xb9, dst, src1, src2);
1378 : }
1379 : void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1380 12 : vfmass(0x9b, dst, src1, src2);
1381 : }
1382 : void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1383 12 : vfmass(0xab, dst, src1, src2);
1384 : }
1385 : void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1386 12 : vfmass(0xbb, dst, src1, src2);
1387 : }
1388 : void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1389 12 : vfmass(0x9b, dst, src1, src2);
1390 : }
1391 : void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1392 12 : vfmass(0xab, dst, src1, src2);
1393 : }
1394 : void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1395 12 : vfmass(0xbb, dst, src1, src2);
1396 : }
1397 : void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1398 12 : vfmass(0x9d, dst, src1, src2);
1399 : }
1400 : void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1401 12 : vfmass(0xad, dst, src1, src2);
1402 : }
1403 : void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1404 12 : vfmass(0xbd, dst, src1, src2);
1405 : }
1406 : void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1407 12 : vfmass(0x9d, dst, src1, src2);
1408 : }
1409 : void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1410 12 : vfmass(0xad, dst, src1, src2);
1411 : }
1412 : void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1413 12 : vfmass(0xbd, dst, src1, src2);
1414 : }
1415 : void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1416 12 : vfmass(0x9f, dst, src1, src2);
1417 : }
1418 : void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1419 12 : vfmass(0xaf, dst, src1, src2);
1420 : }
1421 : void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1422 12 : vfmass(0xbf, dst, src1, src2);
1423 : }
1424 : void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1425 12 : vfmass(0x9f, dst, src1, src2);
1426 : }
1427 : void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1428 12 : vfmass(0xaf, dst, src1, src2);
1429 : }
1430 : void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1431 12 : vfmass(0xbf, dst, src1, src2);
1432 : }
1433 : void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1434 : void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1435 :
1436 : void vmovd(XMMRegister dst, Register src);
1437 : void vmovd(XMMRegister dst, const Operand& src);
1438 : void vmovd(Register dst, XMMRegister src);
1439 : void vmovq(XMMRegister dst, Register src);
1440 : void vmovq(XMMRegister dst, const Operand& src);
1441 : void vmovq(Register dst, XMMRegister src);
1442 :
1443 4237 : void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1444 : vsd(0x10, dst, src1, src2);
1445 4237 : }
1446 1229115 : void vmovsd(XMMRegister dst, const Operand& src) {
1447 : vsd(0x10, dst, xmm0, src);
1448 1229122 : }
1449 1078045 : void vmovsd(const Operand& dst, XMMRegister src) {
1450 : vsd(0x11, src, xmm0, dst);
1451 1078053 : }
1452 :
1453 : #define AVX_SP_3(instr, opcode) \
1454 : AVX_S_3(instr, opcode) \
1455 : AVX_P_3(instr, opcode)
1456 :
1457 : #define AVX_S_3(instr, opcode) \
1458 : AVX_3(instr##ss, opcode, vss) \
1459 : AVX_3(instr##sd, opcode, vsd)
1460 :
1461 : #define AVX_P_3(instr, opcode) \
1462 : AVX_3(instr##ps, opcode, vps) \
1463 : AVX_3(instr##pd, opcode, vpd)
1464 :
1465 : #define AVX_3(instr, opcode, impl) \
1466 : void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1467 : impl(opcode, dst, src1, src2); \
1468 : } \
1469 : void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1470 : impl(opcode, dst, src1, src2); \
1471 : }
1472 :
1473 886 : AVX_SP_3(vsqrt, 0x51);
1474 121736 : AVX_SP_3(vadd, 0x58);
1475 49732 : AVX_SP_3(vsub, 0x5c);
1476 28458 : AVX_SP_3(vmul, 0x59);
1477 34634 : AVX_SP_3(vdiv, 0x5e);
1478 54 : AVX_SP_3(vmin, 0x5d);
1479 54 : AVX_SP_3(vmax, 0x5f);
1480 494 : AVX_P_3(vand, 0x54);
1481 18 : AVX_P_3(vor, 0x56);
1482 430647 : AVX_P_3(vxor, 0x57);
1483 41308 : AVX_3(vcvtsd2ss, 0x5a, vsd);
1484 :
1485 : #undef AVX_3
1486 : #undef AVX_S_3
1487 : #undef AVX_P_3
1488 : #undef AVX_SP_3
1489 :
1490 : void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1491 6526 : vpd(0x73, xmm2, dst, src);
1492 : emit(imm8);
1493 : }
1494 : void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1495 39422 : vpd(0x73, xmm6, dst, src);
1496 : emit(imm8);
1497 : }
1498 : void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1499 11065 : vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1500 : }
1501 : void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1502 13446 : vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1503 : }
1504 : void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1505 294576 : XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1506 294576 : vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1507 : }
1508 : void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1509 4074 : vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1510 : }
1511 : void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1512 553 : XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1513 553 : vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1514 : }
1515 : void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1516 10 : vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1517 : }
1518 : void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1519 263 : XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1520 263 : vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1521 : }
1522 : void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1523 0 : vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1524 : }
1525 : void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1526 3888 : XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1527 3888 : vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1528 : }
1529 : void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1530 0 : vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1531 : }
1532 271 : void vcvttss2si(Register dst, XMMRegister src) {
1533 271 : XMMRegister idst = XMMRegister::from_code(dst.code());
1534 271 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1535 271 : }
1536 0 : void vcvttss2si(Register dst, const Operand& src) {
1537 0 : XMMRegister idst = XMMRegister::from_code(dst.code());
1538 0 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1539 0 : }
1540 86073 : void vcvttsd2si(Register dst, XMMRegister src) {
1541 86073 : XMMRegister idst = XMMRegister::from_code(dst.code());
1542 86073 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1543 86073 : }
1544 22448 : void vcvttsd2si(Register dst, const Operand& src) {
1545 22448 : XMMRegister idst = XMMRegister::from_code(dst.code());
1546 22448 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1547 22448 : }
1548 289 : void vcvttss2siq(Register dst, XMMRegister src) {
1549 289 : XMMRegister idst = XMMRegister::from_code(dst.code());
1550 289 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1551 288 : }
1552 0 : void vcvttss2siq(Register dst, const Operand& src) {
1553 0 : XMMRegister idst = XMMRegister::from_code(dst.code());
1554 0 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1555 0 : }
1556 48745 : void vcvttsd2siq(Register dst, XMMRegister src) {
1557 48745 : XMMRegister idst = XMMRegister::from_code(dst.code());
1558 48745 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1559 48752 : }
1560 12 : void vcvttsd2siq(Register dst, const Operand& src) {
1561 12 : XMMRegister idst = XMMRegister::from_code(dst.code());
1562 12 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1563 12 : }
1564 12 : void vcvtsd2si(Register dst, XMMRegister src) {
1565 12 : XMMRegister idst = XMMRegister::from_code(dst.code());
1566 12 : vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1567 12 : }
1568 : void vucomisd(XMMRegister dst, XMMRegister src) {
1569 185245 : vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1570 : }
1571 : void vucomisd(XMMRegister dst, const Operand& src) {
1572 26867 : vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1573 : }
1574 512 : void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1575 : RoundingMode mode) {
1576 512 : vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1577 512 : emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1578 512 : }
1579 18808 : void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1580 : RoundingMode mode) {
1581 18808 : vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1582 18808 : emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1583 18808 : }
1584 :
1585 : void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1586 122522 : vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1587 : }
1588 : void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1589 2321728 : vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1590 : }
1591 :
1592 : void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1593 128 : vss(0x10, dst, src1, src2);
1594 : }
1595 : void vmovss(XMMRegister dst, const Operand& src) {
1596 7738 : vss(0x10, dst, xmm0, src);
1597 : }
1598 : void vmovss(const Operand& dst, XMMRegister src) {
1599 581167 : vss(0x11, src, xmm0, dst);
1600 : }
1601 : void vucomiss(XMMRegister dst, XMMRegister src);
1602 : void vucomiss(XMMRegister dst, const Operand& src);
1603 : void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1604 : void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1605 :
1606 514 : void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
1607 6 : void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
1608 : void vmovups(XMMRegister dst, const Operand& src) {
1609 24 : vps(0x10, dst, xmm0, src);
1610 : }
1611 : void vmovups(const Operand& dst, XMMRegister src) {
1612 24 : vps(0x11, src, xmm0, dst);
1613 : }
1614 104936 : void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
1615 : void vmovupd(XMMRegister dst, const Operand& src) {
1616 6 : vpd(0x10, dst, xmm0, src);
1617 : }
1618 : void vmovupd(const Operand& dst, XMMRegister src) {
1619 6 : vpd(0x11, src, xmm0, dst);
1620 : }
1621 : void vmovmskps(Register dst, XMMRegister src) {
1622 122 : XMMRegister idst = XMMRegister::from_code(dst.code());
1623 122 : vps(0x50, idst, xmm0, src);
1624 : }
1625 : void vmovmskpd(Register dst, XMMRegister src) {
1626 450 : XMMRegister idst = XMMRegister::from_code(dst.code());
1627 450 : vpd(0x50, idst, xmm0, src);
1628 : }
1629 : void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1630 42 : vps(0xC2, dst, src1, src2);
1631 : emit(cmp);
1632 : }
1633 : void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
1634 : int8_t cmp) {
1635 42 : vps(0xC2, dst, src1, src2);
1636 : emit(cmp);
1637 : }
1638 : void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1639 42 : vpd(0xC2, dst, src1, src2);
1640 : emit(cmp);
1641 : }
1642 : void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1643 : int8_t cmp) {
1644 42 : vpd(0xC2, dst, src1, src2);
1645 : emit(cmp);
1646 : }
1647 :
1648 : #define AVX_CMP_P(instr, imm8) \
1649 : void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1650 : vcmpps(dst, src1, src2, imm8); \
1651 : } \
1652 : void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1653 : vcmpps(dst, src1, src2, imm8); \
1654 : } \
1655 : void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1656 : vcmppd(dst, src1, src2, imm8); \
1657 : } \
1658 : void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1659 : vcmppd(dst, src1, src2, imm8); \
1660 : }
1661 :
1662 48 : AVX_CMP_P(vcmpeq, 0x0);
1663 48 : AVX_CMP_P(vcmplt, 0x1);
1664 48 : AVX_CMP_P(vcmple, 0x2);
1665 48 : AVX_CMP_P(vcmpneq, 0x4);
1666 48 : AVX_CMP_P(vcmpnlt, 0x5);
1667 48 : AVX_CMP_P(vcmpnle, 0x6);
1668 :
1669 : #undef AVX_CMP_P
1670 :
1671 : void vlddqu(XMMRegister dst, const Operand& src) {
1672 6 : vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1673 : }
1674 6 : void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1675 6 : vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG);
1676 6 : emit(imm8);
1677 6 : }
1678 6 : void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1679 6 : vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG);
1680 6 : emit(imm8);
1681 6 : }
1682 6 : void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1683 6 : vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG);
1684 6 : emit(imm8);
1685 6 : }
1686 : void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1687 : vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG);
1688 : emit(imm8);
1689 : }
1690 : void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1691 : vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG);
1692 : emit(imm8);
1693 : }
1694 6 : void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
1695 6 : vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
1696 6 : emit(imm8);
1697 6 : }
1698 6 : void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
1699 6 : XMMRegister idst = XMMRegister::from_code(dst.code());
1700 6 : vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
1701 6 : emit(imm8);
1702 6 : }
1703 6 : void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
1704 6 : vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
1705 6 : emit(imm8);
1706 6 : }
1707 6 : void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
1708 6 : XMMRegister idst = XMMRegister::from_code(dst.code());
1709 6 : vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
1710 6 : emit(imm8);
1711 6 : }
1712 6 : void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
1713 6 : vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
1714 6 : emit(imm8);
1715 6 : }
1716 6 : void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
1717 6 : XMMRegister idst = XMMRegister::from_code(dst.code());
1718 6 : vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
1719 6 : emit(imm8);
1720 6 : }
1721 6 : void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
1722 6 : vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
1723 6 : emit(imm8);
1724 6 : }
1725 6 : void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1726 6 : XMMRegister isrc = XMMRegister::from_code(src2.code());
1727 6 : vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1728 6 : emit(imm8);
1729 6 : }
1730 6 : void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
1731 : int8_t imm8) {
1732 6 : vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1733 6 : emit(imm8);
1734 6 : }
1735 6 : void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1736 6 : XMMRegister isrc = XMMRegister::from_code(src2.code());
1737 6 : vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1738 6 : emit(imm8);
1739 6 : }
1740 6 : void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
1741 : int8_t imm8) {
1742 6 : vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1743 6 : emit(imm8);
1744 6 : }
1745 6 : void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1746 6 : XMMRegister isrc = XMMRegister::from_code(src2.code());
1747 6 : vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1748 6 : emit(imm8);
1749 6 : }
1750 6 : void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1751 : int8_t imm8) {
1752 6 : vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1753 6 : emit(imm8);
1754 6 : }
1755 6 : void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
1756 6 : vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1757 6 : emit(imm8);
1758 6 : }
1759 :
1760 : void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1761 : void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1762 : void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1763 : void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1764 :
1765 : // BMI instruction
1766 : void andnq(Register dst, Register src1, Register src2) {
1767 12 : bmi1q(0xf2, dst, src1, src2);
1768 : }
1769 : void andnq(Register dst, Register src1, const Operand& src2) {
1770 12 : bmi1q(0xf2, dst, src1, src2);
1771 : }
1772 : void andnl(Register dst, Register src1, Register src2) {
1773 12 : bmi1l(0xf2, dst, src1, src2);
1774 : }
1775 : void andnl(Register dst, Register src1, const Operand& src2) {
1776 12 : bmi1l(0xf2, dst, src1, src2);
1777 : }
1778 : void bextrq(Register dst, Register src1, Register src2) {
1779 12 : bmi1q(0xf7, dst, src2, src1);
1780 : }
1781 : void bextrq(Register dst, const Operand& src1, Register src2) {
1782 12 : bmi1q(0xf7, dst, src2, src1);
1783 : }
1784 : void bextrl(Register dst, Register src1, Register src2) {
1785 12 : bmi1l(0xf7, dst, src2, src1);
1786 : }
1787 : void bextrl(Register dst, const Operand& src1, Register src2) {
1788 12 : bmi1l(0xf7, dst, src2, src1);
1789 : }
1790 12 : void blsiq(Register dst, Register src) { bmi1q(0xf3, rbx, dst, src); }
1791 12 : void blsiq(Register dst, const Operand& src) { bmi1q(0xf3, rbx, dst, src); }
1792 12 : void blsil(Register dst, Register src) { bmi1l(0xf3, rbx, dst, src); }
1793 12 : void blsil(Register dst, const Operand& src) { bmi1l(0xf3, rbx, dst, src); }
1794 12 : void blsmskq(Register dst, Register src) { bmi1q(0xf3, rdx, dst, src); }
1795 12 : void blsmskq(Register dst, const Operand& src) { bmi1q(0xf3, rdx, dst, src); }
1796 12 : void blsmskl(Register dst, Register src) { bmi1l(0xf3, rdx, dst, src); }
1797 12 : void blsmskl(Register dst, const Operand& src) { bmi1l(0xf3, rdx, dst, src); }
1798 12 : void blsrq(Register dst, Register src) { bmi1q(0xf3, rcx, dst, src); }
1799 12 : void blsrq(Register dst, const Operand& src) { bmi1q(0xf3, rcx, dst, src); }
1800 12 : void blsrl(Register dst, Register src) { bmi1l(0xf3, rcx, dst, src); }
1801 12 : void blsrl(Register dst, const Operand& src) { bmi1l(0xf3, rcx, dst, src); }
1802 : void tzcntq(Register dst, Register src);
1803 : void tzcntq(Register dst, const Operand& src);
1804 : void tzcntl(Register dst, Register src);
1805 : void tzcntl(Register dst, const Operand& src);
1806 :
1807 : void lzcntq(Register dst, Register src);
1808 : void lzcntq(Register dst, const Operand& src);
1809 : void lzcntl(Register dst, Register src);
1810 : void lzcntl(Register dst, const Operand& src);
1811 :
1812 : void popcntq(Register dst, Register src);
1813 : void popcntq(Register dst, const Operand& src);
1814 : void popcntl(Register dst, Register src);
1815 : void popcntl(Register dst, const Operand& src);
1816 :
1817 : void bzhiq(Register dst, Register src1, Register src2) {
1818 12 : bmi2q(kNone, 0xf5, dst, src2, src1);
1819 : }
1820 : void bzhiq(Register dst, const Operand& src1, Register src2) {
1821 12 : bmi2q(kNone, 0xf5, dst, src2, src1);
1822 : }
1823 : void bzhil(Register dst, Register src1, Register src2) {
1824 12 : bmi2l(kNone, 0xf5, dst, src2, src1);
1825 : }
1826 : void bzhil(Register dst, const Operand& src1, Register src2) {
1827 12 : bmi2l(kNone, 0xf5, dst, src2, src1);
1828 : }
1829 : void mulxq(Register dst1, Register dst2, Register src) {
1830 12 : bmi2q(kF2, 0xf6, dst1, dst2, src);
1831 : }
1832 : void mulxq(Register dst1, Register dst2, const Operand& src) {
1833 12 : bmi2q(kF2, 0xf6, dst1, dst2, src);
1834 : }
1835 : void mulxl(Register dst1, Register dst2, Register src) {
1836 12 : bmi2l(kF2, 0xf6, dst1, dst2, src);
1837 : }
1838 : void mulxl(Register dst1, Register dst2, const Operand& src) {
1839 12 : bmi2l(kF2, 0xf6, dst1, dst2, src);
1840 : }
1841 : void pdepq(Register dst, Register src1, Register src2) {
1842 12 : bmi2q(kF2, 0xf5, dst, src1, src2);
1843 : }
1844 : void pdepq(Register dst, Register src1, const Operand& src2) {
1845 12 : bmi2q(kF2, 0xf5, dst, src1, src2);
1846 : }
1847 : void pdepl(Register dst, Register src1, Register src2) {
1848 12 : bmi2l(kF2, 0xf5, dst, src1, src2);
1849 : }
1850 : void pdepl(Register dst, Register src1, const Operand& src2) {
1851 12 : bmi2l(kF2, 0xf5, dst, src1, src2);
1852 : }
1853 : void pextq(Register dst, Register src1, Register src2) {
1854 12 : bmi2q(kF3, 0xf5, dst, src1, src2);
1855 : }
1856 : void pextq(Register dst, Register src1, const Operand& src2) {
1857 12 : bmi2q(kF3, 0xf5, dst, src1, src2);
1858 : }
1859 : void pextl(Register dst, Register src1, Register src2) {
1860 12 : bmi2l(kF3, 0xf5, dst, src1, src2);
1861 : }
1862 : void pextl(Register dst, Register src1, const Operand& src2) {
1863 12 : bmi2l(kF3, 0xf5, dst, src1, src2);
1864 : }
1865 : void sarxq(Register dst, Register src1, Register src2) {
1866 12 : bmi2q(kF3, 0xf7, dst, src2, src1);
1867 : }
1868 : void sarxq(Register dst, const Operand& src1, Register src2) {
1869 12 : bmi2q(kF3, 0xf7, dst, src2, src1);
1870 : }
1871 : void sarxl(Register dst, Register src1, Register src2) {
1872 12 : bmi2l(kF3, 0xf7, dst, src2, src1);
1873 : }
1874 : void sarxl(Register dst, const Operand& src1, Register src2) {
1875 12 : bmi2l(kF3, 0xf7, dst, src2, src1);
1876 : }
1877 : void shlxq(Register dst, Register src1, Register src2) {
1878 12 : bmi2q(k66, 0xf7, dst, src2, src1);
1879 : }
1880 : void shlxq(Register dst, const Operand& src1, Register src2) {
1881 12 : bmi2q(k66, 0xf7, dst, src2, src1);
1882 : }
1883 : void shlxl(Register dst, Register src1, Register src2) {
1884 12 : bmi2l(k66, 0xf7, dst, src2, src1);
1885 : }
1886 : void shlxl(Register dst, const Operand& src1, Register src2) {
1887 12 : bmi2l(k66, 0xf7, dst, src2, src1);
1888 : }
1889 : void shrxq(Register dst, Register src1, Register src2) {
1890 12 : bmi2q(kF2, 0xf7, dst, src2, src1);
1891 : }
1892 : void shrxq(Register dst, const Operand& src1, Register src2) {
1893 12 : bmi2q(kF2, 0xf7, dst, src2, src1);
1894 : }
1895 : void shrxl(Register dst, Register src1, Register src2) {
1896 12 : bmi2l(kF2, 0xf7, dst, src2, src1);
1897 : }
1898 : void shrxl(Register dst, const Operand& src1, Register src2) {
1899 12 : bmi2l(kF2, 0xf7, dst, src2, src1);
1900 : }
1901 : void rorxq(Register dst, Register src, byte imm8);
1902 : void rorxq(Register dst, const Operand& src, byte imm8);
1903 : void rorxl(Register dst, Register src, byte imm8);
1904 : void rorxl(Register dst, const Operand& src, byte imm8);
1905 :
1906 : // Check the code size generated from label to here.
1907 : int SizeOfCodeGeneratedSince(Label* label) {
1908 : return pc_offset() - label->pos();
1909 : }
1910 :
1911 : // Record a comment relocation entry that can be used by a disassembler.
1912 : // Use --code-comments to enable.
1913 : void RecordComment(const char* msg);
1914 :
1915 : // Record a deoptimization reason that can be used by a log or cpu profiler.
1916 : // Use --trace-deopt to enable.
1917 : void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1918 : int id);
1919 :
1920 0 : void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
1921 : ConstantPoolEntry::Access access,
1922 : ConstantPoolEntry::Type type) {
1923 : // No embedded constant pool support.
1924 0 : UNREACHABLE();
1925 : }
1926 :
1927 : void RecordProtectedInstructionLanding(int pc_offset);
1928 :
1929 : // Writes a single word of data in the code stream.
1930 : // Used for inline tables, e.g., jump-tables.
1931 : void db(uint8_t data);
1932 : void dd(uint32_t data);
1933 : void dq(uint64_t data);
1934 0 : void dp(uintptr_t data) { dq(data); }
1935 : void dq(Label* label);
1936 :
1937 : // Check if there is less than kGap bytes available in the buffer.
1938 : // If this is the case, we need to grow the buffer before emitting
1939 : // an instruction or relocation information.
1940 : inline bool buffer_overflow() const {
1941 1360774231 : return pc_ >= reloc_info_writer.pos() - kGap;
1942 : }
1943 :
1944 : // Get the number of bytes available in the buffer.
1945 : inline int available_space() const {
1946 : return static_cast<int>(reloc_info_writer.pos() - pc_);
1947 : }
1948 :
1949 : static bool IsNop(Address addr);
1950 :
1951 : // Avoid overflows for displacements etc.
1952 : static constexpr int kMaximalBufferSize = 512 * MB;
1953 :
1954 : byte byte_at(int pos) { return buffer_[pos]; }
1955 668858 : void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1956 :
1957 : Address pc() const { return pc_; }
1958 :
1959 : protected:
1960 : // Call near indirect
1961 : void call(const Operand& operand);
1962 :
1963 : private:
1964 1835408523 : byte* addr_at(int pos) { return buffer_ + pos; }
1965 1223658449 : uint32_t long_at(int pos) {
1966 1223658449 : return *reinterpret_cast<uint32_t*>(addr_at(pos));
1967 : }
1968 611429793 : void long_at_put(int pos, uint32_t x) {
1969 612058558 : *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1970 : }
1971 :
1972 : // code emission
1973 : void GrowBuffer();
1974 :
1975 1451426235 : void emit(byte x) { *pc_++ = x; }
1976 : inline void emitl(uint32_t x);
1977 : inline void emitp(void* x, RelocInfo::Mode rmode);
1978 : inline void emitq(uint64_t x);
1979 : inline void emitw(uint16_t x);
1980 : inline void emit_code_target(Handle<Code> target, RelocInfo::Mode rmode);
1981 : inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1982 : inline void emit(Immediate x);
1983 :
1984 : // Emits a REX prefix that encodes a 64-bit operand size and
1985 : // the top bit of both register codes.
1986 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1987 : // REX.W is set.
1988 : inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1989 : inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1990 : inline void emit_rex_64(Register reg, Register rm_reg);
1991 :
1992 : // Emits a REX prefix that encodes a 64-bit operand size and
1993 : // the top bit of the destination, index, and base register codes.
1994 : // The high bit of reg is used for REX.R, the high bit of op's base
1995 : // register is used for REX.B, and the high bit of op's index register
1996 : // is used for REX.X. REX.W is set.
1997 : inline void emit_rex_64(Register reg, const Operand& op);
1998 : inline void emit_rex_64(XMMRegister reg, const Operand& op);
1999 :
2000 : // Emits a REX prefix that encodes a 64-bit operand size and
2001 : // the top bit of the register code.
2002 : // The high bit of register is used for REX.B.
2003 : // REX.W is set and REX.R and REX.X are clear.
2004 : inline void emit_rex_64(Register rm_reg);
2005 :
2006 : // Emits a REX prefix that encodes a 64-bit operand size and
2007 : // the top bit of the index and base register codes.
2008 : // The high bit of op's base register is used for REX.B, and the high
2009 : // bit of op's index register is used for REX.X.
2010 : // REX.W is set and REX.R clear.
2011 : inline void emit_rex_64(const Operand& op);
2012 :
2013 : // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
2014 : void emit_rex_64() { emit(0x48); }
2015 :
2016 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2017 : // REX.W is clear.
2018 : inline void emit_rex_32(Register reg, Register rm_reg);
2019 :
2020 : // The high bit of reg is used for REX.R, the high bit of op's base
2021 : // register is used for REX.B, and the high bit of op's index register
2022 : // is used for REX.X. REX.W is cleared.
2023 : inline void emit_rex_32(Register reg, const Operand& op);
2024 :
2025 : // High bit of rm_reg goes to REX.B.
2026 : // REX.W, REX.R and REX.X are clear.
2027 : inline void emit_rex_32(Register rm_reg);
2028 :
2029 : // High bit of base goes to REX.B and high bit of index to REX.X.
2030 : // REX.W and REX.R are clear.
2031 : inline void emit_rex_32(const Operand& op);
2032 :
2033 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2034 : // REX.W is cleared. If no REX bits are set, no byte is emitted.
2035 : inline void emit_optional_rex_32(Register reg, Register rm_reg);
2036 :
2037 : // The high bit of reg is used for REX.R, the high bit of op's base
2038 : // register is used for REX.B, and the high bit of op's index register
2039 : // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
2040 : // is emitted.
2041 : inline void emit_optional_rex_32(Register reg, const Operand& op);
2042 :
2043 : // As for emit_optional_rex_32(Register, Register), except that
2044 : // the registers are XMM registers.
2045 : inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2046 :
2047 : // As for emit_optional_rex_32(Register, Register), except that
2048 : // one of the registers is an XMM registers.
2049 : inline void emit_optional_rex_32(XMMRegister reg, Register base);
2050 :
2051 : // As for emit_optional_rex_32(Register, Register), except that
2052 : // one of the registers is an XMM registers.
2053 : inline void emit_optional_rex_32(Register reg, XMMRegister base);
2054 :
2055 : // As for emit_optional_rex_32(Register, const Operand&), except that
2056 : // the register is an XMM register.
2057 : inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
2058 :
2059 : // Optionally do as emit_rex_32(Register) if the register number has
2060 : // the high bit set.
2061 : inline void emit_optional_rex_32(Register rm_reg);
2062 : inline void emit_optional_rex_32(XMMRegister rm_reg);
2063 :
2064 : // Optionally do as emit_rex_32(const Operand&) if the operand register
2065 : // numbers have a high bit set.
2066 : inline void emit_optional_rex_32(const Operand& op);
2067 :
2068 : void emit_rex(int size) {
2069 0 : if (size == kInt64Size) {
2070 : emit_rex_64();
2071 : } else {
2072 : DCHECK_EQ(size, kInt32Size);
2073 : }
2074 : }
2075 :
2076 : template<class P1>
2077 : void emit_rex(P1 p1, int size) {
2078 13607413 : if (size == kInt64Size) {
2079 : emit_rex_64(p1);
2080 : } else {
2081 : DCHECK_EQ(size, kInt32Size);
2082 : emit_optional_rex_32(p1);
2083 : }
2084 : }
2085 :
2086 : template<class P1, class P2>
2087 43070241 : void emit_rex(P1 p1, P2 p2, int size) {
2088 43070241 : if (size == kInt64Size) {
2089 : emit_rex_64(p1, p2);
2090 : } else {
2091 : DCHECK_EQ(size, kInt32Size);
2092 : emit_optional_rex_32(p1, p2);
2093 : }
2094 43070241 : }
2095 :
2096 : // Emit vex prefix
2097 : void emit_vex2_byte0() { emit(0xc5); }
2098 : inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2099 : SIMDPrefix pp);
2100 : void emit_vex3_byte0() { emit(0xc4); }
2101 : inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2102 : inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
2103 : LeadingOpcode m);
2104 : inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2105 : SIMDPrefix pp);
2106 : inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2107 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2108 : VexW w);
2109 : inline void emit_vex_prefix(Register reg, Register v, Register rm,
2110 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2111 : VexW w);
2112 : inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
2113 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2114 : VexW w);
2115 : inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
2116 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2117 : VexW w);
2118 :
2119 : // Emit the ModR/M byte, and optionally the SIB byte and
2120 : // 1- or 4-byte offset for a memory operand. Also encodes
2121 : // the second operand of the operation, a register or operation
2122 : // subcode, into the reg field of the ModR/M byte.
2123 : void emit_operand(Register reg, const Operand& adr) {
2124 35001349 : emit_operand(reg.low_bits(), adr);
2125 : }
2126 :
2127 : // Emit the ModR/M byte, and optionally the SIB byte and
2128 : // 1- or 4-byte offset for a memory operand. Also used to encode
2129 : // a three-bit opcode extension into the ModR/M byte.
2130 : void emit_operand(int rm, const Operand& adr);
2131 :
2132 : // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
2133 : void emit_modrm(Register reg, Register rm_reg) {
2134 16065797 : emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2135 : }
2136 :
2137 : // Emit a ModR/M byte with an operation subcode in the reg field and
2138 : // a register in the rm_reg field.
2139 : void emit_modrm(int code, Register rm_reg) {
2140 : DCHECK(is_uint3(code));
2141 21648512 : emit(0xC0 | code << 3 | rm_reg.low_bits());
2142 : }
2143 :
2144 : // Emit the code-object-relative offset of the label's position
2145 : inline void emit_code_relative_offset(Label* label);
2146 :
2147 : // The first argument is the reg field, the second argument is the r/m field.
2148 : void emit_sse_operand(XMMRegister dst, XMMRegister src);
2149 : void emit_sse_operand(XMMRegister reg, const Operand& adr);
2150 : void emit_sse_operand(Register reg, const Operand& adr);
2151 : void emit_sse_operand(XMMRegister dst, Register src);
2152 : void emit_sse_operand(Register dst, XMMRegister src);
2153 : void emit_sse_operand(XMMRegister dst);
2154 :
2155 : // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2156 : // AND, OR, XOR, or CMP. The encodings of these operations are all
2157 : // similar, differing just in the opcode or in the reg field of the
2158 : // ModR/M byte.
2159 : void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2160 : void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
2161 : void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2162 : void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
2163 : // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2164 : void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2165 : void arithmetic_op(byte opcode,
2166 : Register reg,
2167 : const Operand& rm_reg,
2168 : int size);
2169 : // Operate on a byte in memory or register.
2170 : void immediate_arithmetic_op_8(byte subcode,
2171 : Register dst,
2172 : Immediate src);
2173 : void immediate_arithmetic_op_8(byte subcode,
2174 : const Operand& dst,
2175 : Immediate src);
2176 : // Operate on a word in memory or register.
2177 : void immediate_arithmetic_op_16(byte subcode,
2178 : Register dst,
2179 : Immediate src);
2180 : void immediate_arithmetic_op_16(byte subcode,
2181 : const Operand& dst,
2182 : Immediate src);
2183 : // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2184 : void immediate_arithmetic_op(byte subcode,
2185 : Register dst,
2186 : Immediate src,
2187 : int size);
2188 : void immediate_arithmetic_op(byte subcode,
2189 : const Operand& dst,
2190 : Immediate src,
2191 : int size);
2192 :
2193 : // Emit machine code for a shift operation.
2194 : void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2195 : void shift(Register dst, Immediate shift_amount, int subcode, int size);
2196 : // Shift dst by cl % 64 bits.
2197 : void shift(Register dst, int subcode, int size);
2198 : void shift(Operand dst, int subcode, int size);
2199 :
2200 : void emit_farith(int b1, int b2, int i);
2201 :
2202 : // labels
2203 : // void print(Label* L);
2204 : void bind_to(Label* L, int pos);
2205 :
2206 : // record reloc info for current pc_
2207 : void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2208 :
2209 : // Arithmetics
2210 : void emit_add(Register dst, Register src, int size) {
2211 654754 : arithmetic_op(0x03, dst, src, size);
2212 : }
2213 :
2214 : void emit_add(Register dst, Immediate src, int size) {
2215 2513488 : immediate_arithmetic_op(0x0, dst, src, size);
2216 : }
2217 :
2218 : void emit_add(Register dst, const Operand& src, int size) {
2219 12661 : arithmetic_op(0x03, dst, src, size);
2220 : }
2221 :
2222 : void emit_add(const Operand& dst, Register src, int size) {
2223 187090 : arithmetic_op(0x1, src, dst, size);
2224 : }
2225 :
2226 : void emit_add(const Operand& dst, Immediate src, int size) {
2227 17749 : immediate_arithmetic_op(0x0, dst, src, size);
2228 : }
2229 :
2230 : void emit_and(Register dst, Register src, int size) {
2231 792781 : arithmetic_op(0x23, dst, src, size);
2232 : }
2233 :
2234 : void emit_and(Register dst, const Operand& src, int size) {
2235 2290 : arithmetic_op(0x23, dst, src, size);
2236 : }
2237 :
2238 : void emit_and(const Operand& dst, Register src, int size) {
2239 : arithmetic_op(0x21, src, dst, size);
2240 : }
2241 :
2242 : void emit_and(Register dst, Immediate src, int size) {
2243 707800 : immediate_arithmetic_op(0x4, dst, src, size);
2244 : }
2245 :
2246 : void emit_and(const Operand& dst, Immediate src, int size) {
2247 0 : immediate_arithmetic_op(0x4, dst, src, size);
2248 : }
2249 :
2250 : void emit_cmp(Register dst, Register src, int size) {
2251 1467475 : arithmetic_op(0x3B, dst, src, size);
2252 : }
2253 :
2254 : void emit_cmp(Register dst, const Operand& src, int size) {
2255 731416 : arithmetic_op(0x3B, dst, src, size);
2256 : }
2257 :
2258 : void emit_cmp(const Operand& dst, Register src, int size) {
2259 1085880 : arithmetic_op(0x39, src, dst, size);
2260 : }
2261 :
2262 : void emit_cmp(Register dst, Immediate src, int size) {
2263 2717928 : immediate_arithmetic_op(0x7, dst, src, size);
2264 : }
2265 :
2266 : void emit_cmp(const Operand& dst, Immediate src, int size) {
2267 400552 : immediate_arithmetic_op(0x7, dst, src, size);
2268 : }
2269 :
2270 : // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2271 : // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2272 : // operation is only atomic if prefixed by the lock instruction.
2273 : void emit_cmpxchg(const Operand& dst, Register src, int size);
2274 :
2275 : void emit_dec(Register dst, int size);
2276 : void emit_dec(const Operand& dst, int size);
2277 :
2278 : // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2279 : // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2280 : // when size is 32.
2281 : void emit_idiv(Register src, int size);
2282 : void emit_div(Register src, int size);
2283 :
2284 : // Signed multiply instructions.
2285 : // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2286 : void emit_imul(Register src, int size);
2287 : void emit_imul(const Operand& src, int size);
2288 : void emit_imul(Register dst, Register src, int size);
2289 : void emit_imul(Register dst, const Operand& src, int size);
2290 : void emit_imul(Register dst, Register src, Immediate imm, int size);
2291 : void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2292 :
2293 : void emit_inc(Register dst, int size);
2294 : void emit_inc(const Operand& dst, int size);
2295 :
2296 : void emit_lea(Register dst, const Operand& src, int size);
2297 :
2298 : void emit_mov(Register dst, const Operand& src, int size);
2299 : void emit_mov(Register dst, Register src, int size);
2300 : void emit_mov(const Operand& dst, Register src, int size);
2301 : void emit_mov(Register dst, Immediate value, int size);
2302 : void emit_mov(const Operand& dst, Immediate value, int size);
2303 :
2304 : void emit_movzxb(Register dst, const Operand& src, int size);
2305 : void emit_movzxb(Register dst, Register src, int size);
2306 : void emit_movzxw(Register dst, const Operand& src, int size);
2307 : void emit_movzxw(Register dst, Register src, int size);
2308 :
2309 : void emit_neg(Register dst, int size);
2310 : void emit_neg(const Operand& dst, int size);
2311 :
2312 : void emit_not(Register dst, int size);
2313 : void emit_not(const Operand& dst, int size);
2314 :
2315 : void emit_or(Register dst, Register src, int size) {
2316 44744 : arithmetic_op(0x0B, dst, src, size);
2317 : }
2318 :
2319 : void emit_or(Register dst, const Operand& src, int size) {
2320 7244 : arithmetic_op(0x0B, dst, src, size);
2321 : }
2322 :
2323 : void emit_or(const Operand& dst, Register src, int size) {
2324 6 : arithmetic_op(0x9, src, dst, size);
2325 : }
2326 :
2327 : void emit_or(Register dst, Immediate src, int size) {
2328 9572 : immediate_arithmetic_op(0x1, dst, src, size);
2329 : }
2330 :
2331 : void emit_or(const Operand& dst, Immediate src, int size) {
2332 0 : immediate_arithmetic_op(0x1, dst, src, size);
2333 : }
2334 :
2335 : void emit_repmovs(int size);
2336 :
2337 : void emit_sbb(Register dst, Register src, int size) {
2338 6 : arithmetic_op(0x1b, dst, src, size);
2339 : }
2340 :
2341 : void emit_sub(Register dst, Register src, int size) {
2342 189028 : arithmetic_op(0x2B, dst, src, size);
2343 : }
2344 :
2345 : void emit_sub(Register dst, Immediate src, int size) {
2346 2761883 : immediate_arithmetic_op(0x5, dst, src, size);
2347 : }
2348 :
2349 : void emit_sub(Register dst, const Operand& src, int size) {
2350 193546 : arithmetic_op(0x2B, dst, src, size);
2351 : }
2352 :
2353 : void emit_sub(const Operand& dst, Register src, int size) {
2354 187096 : arithmetic_op(0x29, src, dst, size);
2355 : }
2356 :
2357 : void emit_sub(const Operand& dst, Immediate src, int size) {
2358 3517 : immediate_arithmetic_op(0x5, dst, src, size);
2359 : }
2360 :
2361 : void emit_test(Register dst, Register src, int size);
2362 : void emit_test(Register reg, Immediate mask, int size);
2363 : void emit_test(const Operand& op, Register reg, int size);
2364 : void emit_test(const Operand& op, Immediate mask, int size);
2365 : void emit_test(Register reg, const Operand& op, int size) {
2366 406 : return emit_test(op, reg, size);
2367 : }
2368 :
2369 : void emit_xchg(Register dst, Register src, int size);
2370 : void emit_xchg(Register dst, const Operand& src, int size);
2371 :
2372 1937971 : void emit_xor(Register dst, Register src, int size) {
2373 1939079 : if (size == kInt64Size && dst.code() == src.code()) {
2374 : // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2375 : // there is no need to make this a 64 bit operation.
2376 920 : arithmetic_op(0x33, dst, src, kInt32Size);
2377 : } else {
2378 1937051 : arithmetic_op(0x33, dst, src, size);
2379 : }
2380 1937976 : }
2381 :
2382 : void emit_xor(Register dst, const Operand& src, int size) {
2383 1414 : arithmetic_op(0x33, dst, src, size);
2384 : }
2385 :
2386 : void emit_xor(Register dst, Immediate src, int size) {
2387 12289 : immediate_arithmetic_op(0x6, dst, src, size);
2388 : }
2389 :
2390 : void emit_xor(const Operand& dst, Immediate src, int size) {
2391 0 : immediate_arithmetic_op(0x6, dst, src, size);
2392 : }
2393 :
2394 : void emit_xor(const Operand& dst, Register src, int size) {
2395 6 : arithmetic_op(0x31, src, dst, size);
2396 : }
2397 :
2398 : // Most BMI instructions are similar.
2399 : void bmi1q(byte op, Register reg, Register vreg, Register rm);
2400 : void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2401 : void bmi1l(byte op, Register reg, Register vreg, Register rm);
2402 : void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2403 : void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2404 : void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2405 : const Operand& rm);
2406 : void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2407 : void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2408 : const Operand& rm);
2409 :
2410 : // record the position of jmp/jcc instruction
2411 : void record_farjmp_position(Label* L, int pos);
2412 :
2413 : bool is_optimizable_farjmp(int idx);
2414 :
2415 : friend class EnsureSpace;
2416 : friend class RegExpMacroAssemblerX64;
2417 :
2418 : // code generation
2419 : RelocInfoWriter reloc_info_writer;
2420 :
2421 : // Internal reference positions, required for (potential) patching in
2422 : // GrowBuffer(); contains only those internal references whose labels
2423 : // are already bound.
2424 : std::deque<int> internal_reference_positions_;
2425 :
2426 : std::vector<Handle<Code>> code_targets_;
2427 :
2428 : // The following functions help with avoiding allocations of embedded heap
2429 : // objects during the code assembly phase. {RequestHeapObject} records the
2430 : // need for a future heap number allocation or code stub generation. After
2431 : // code assembly, {AllocateAndInstallRequestedHeapObjects} will allocate these
2432 : // objects and place them where they are expected (determined by the pc offset
2433 : // associated with each request). That is, for each request, it will patch the
2434 : // dummy heap object handle that we emitted during code assembly with the
2435 : // actual heap object handle.
2436 : void RequestHeapObject(HeapObjectRequest request);
2437 : void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2438 :
2439 : std::forward_list<HeapObjectRequest> heap_object_requests_;
2440 :
2441 : // Variables for this instance of assembler
2442 : int farjmp_num_ = 0;
2443 : std::deque<int> farjmp_positions_;
2444 : std::map<Label*, std::vector<int>> label_farjmp_maps_;
2445 : };
2446 :
2447 :
2448 : // Helper class that ensures that there is enough space for generating
2449 : // instructions and relocation information. The constructor makes
2450 : // sure that there is enough space and (in debug mode) the destructor
2451 : // checks that we did not generate too much.
2452 : class EnsureSpace BASE_EMBEDDED {
2453 : public:
2454 : explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2455 1360774231 : if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2456 : #ifdef DEBUG
2457 : space_before_ = assembler_->available_space();
2458 : #endif
2459 : }
2460 :
2461 : #ifdef DEBUG
2462 : ~EnsureSpace() {
2463 : int bytes_generated = space_before_ - assembler_->available_space();
2464 : DCHECK(bytes_generated < assembler_->kGap);
2465 : }
2466 : #endif
2467 :
2468 : private:
2469 : Assembler* assembler_;
2470 : #ifdef DEBUG
2471 : int space_before_;
2472 : #endif
2473 : };
2474 :
2475 : } // namespace internal
2476 : } // namespace v8
2477 :
2478 : #endif // V8_X64_ASSEMBLER_X64_H_
|