Line data Source code
1 : // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 : // All Rights Reserved.
3 : //
4 : // Redistribution and use in source and binary forms, with or without
5 : // modification, are permitted provided that the following conditions are
6 : // met:
7 : //
8 : // - Redistributions of source code must retain the above copyright notice,
9 : // this list of conditions and the following disclaimer.
10 : //
11 : // - Redistribution in binary form must reproduce the above copyright
12 : // notice, this list of conditions and the following disclaimer in the
13 : // documentation and/or other materials provided with the distribution.
14 : //
15 : // - Neither the name of Sun Microsystems or the names of contributors may
16 : // be used to endorse or promote products derived from this software without
17 : // specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 : // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 : // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 : // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 : // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 : // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 : // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 : // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 : // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 : // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 : // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // The original source code covered by the above license above has been
32 : // modified significantly by Google Inc.
33 : // Copyright 2012 the V8 project authors. All rights reserved.
34 :
35 : // A lightweight X64 Assembler.
36 :
37 : #ifndef V8_X64_ASSEMBLER_X64_H_
38 : #define V8_X64_ASSEMBLER_X64_H_
39 :
40 : #include <deque>
41 :
42 : #include "src/assembler.h"
43 : #include "src/x64/sse-instr.h"
44 :
45 : namespace v8 {
46 : namespace internal {
47 :
48 : // Utility functions
49 :
50 : #define GENERAL_REGISTERS(V) \
51 : V(rax) \
52 : V(rcx) \
53 : V(rdx) \
54 : V(rbx) \
55 : V(rsp) \
56 : V(rbp) \
57 : V(rsi) \
58 : V(rdi) \
59 : V(r8) \
60 : V(r9) \
61 : V(r10) \
62 : V(r11) \
63 : V(r12) \
64 : V(r13) \
65 : V(r14) \
66 : V(r15)
67 :
68 : #define ALLOCATABLE_GENERAL_REGISTERS(V) \
69 : V(rax) \
70 : V(rbx) \
71 : V(rdx) \
72 : V(rcx) \
73 : V(rsi) \
74 : V(rdi) \
75 : V(r8) \
76 : V(r9) \
77 : V(r11) \
78 : V(r12) \
79 : V(r14) \
80 : V(r15)
81 :
82 : // The length of pushq(rbp), movp(rbp, rsp), Push(rsi) and Push(rdi).
83 : constexpr int kNoCodeAgeSequenceLength = kPointerSize == kInt64Size ? 6 : 17;
84 :
85 : // CPU Registers.
86 : //
87 : // 1) We would prefer to use an enum, but enum values are assignment-
88 : // compatible with int, which has caused code-generation bugs.
89 : //
90 : // 2) We would prefer to use a class instead of a struct but we don't like
91 : // the register initialization to depend on the particular initialization
92 : // order (which appears to be different on OS X, Linux, and Windows for the
93 : // installed versions of C++ we tried). Using a struct permits C-style
94 : // "initialization". Also, the Register objects cannot be const as this
95 : // forces initialization stubs in MSVC, making us dependent on initialization
96 : // order.
97 : //
98 : // 3) By not using an enum, we are possibly preventing the compiler from
99 : // doing certain constant folds, which may significantly reduce the
100 : // code generated for some assembly instructions (because they boil down
101 : // to a few constants). If this is a problem, we could change the code
102 : // such that we use an enum in optimized mode, and the struct in debug
103 : // mode. This way we get the compile-time error checking in debug mode
104 : // and best performance in optimized code.
105 : //
106 : struct Register {
107 : enum Code {
108 : #define REGISTER_CODE(R) kCode_##R,
109 : GENERAL_REGISTERS(REGISTER_CODE)
110 : #undef REGISTER_CODE
111 : kAfterLast,
112 : kCode_no_reg = -1
113 : };
114 :
115 : static constexpr int kNumRegisters = Code::kAfterLast;
116 :
117 : static Register from_code(int code) {
118 : DCHECK(code >= 0);
119 : DCHECK(code < kNumRegisters);
120 : Register r = {code};
121 : return r;
122 : }
123 239542 : bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
124 12245981 : bool is(Register reg) const { return reg_code == reg.reg_code; }
125 : int code() const {
126 : DCHECK(is_valid());
127 16246837 : return reg_code;
128 : }
129 : int bit() const {
130 : DCHECK(is_valid());
131 168 : return 1 << reg_code;
132 : }
133 :
134 648713 : bool is_byte_register() const { return reg_code <= 3; }
135 : // Return the high bit of the register code as a 0 or 1. Used often
136 : // when constructing the REX prefix byte.
137 330724440 : int high_bit() const { return reg_code >> 3; }
138 : // Return the 3 low bits of the register code. Used when encoding registers
139 : // in modR/M, SIB, and opcode bytes.
140 337705515 : int low_bits() const { return reg_code & 0x7; }
141 :
142 : // Unfortunately we can't make this private in a struct when initializing
143 : // by assignment.
144 : int reg_code;
145 : };
146 :
147 : #define DECLARE_REGISTER(R) constexpr Register R = {Register::kCode_##R};
148 : GENERAL_REGISTERS(DECLARE_REGISTER)
149 : #undef DECLARE_REGISTER
150 : constexpr Register no_reg = {Register::kCode_no_reg};
151 :
152 : #ifdef _WIN64
153 : // Windows calling convention
154 : constexpr Register arg_reg_1 = {Register::kCode_rcx};
155 : constexpr Register arg_reg_2 = {Register::kCode_rdx};
156 : constexpr Register arg_reg_3 = {Register::kCode_r8};
157 : constexpr Register arg_reg_4 = {Register::kCode_r9};
158 : #else
159 : // AMD64 calling convention
160 : constexpr Register arg_reg_1 = {Register::kCode_rdi};
161 : constexpr Register arg_reg_2 = {Register::kCode_rsi};
162 : constexpr Register arg_reg_3 = {Register::kCode_rdx};
163 : constexpr Register arg_reg_4 = {Register::kCode_rcx};
164 : #endif // _WIN64
165 :
166 :
167 : #define DOUBLE_REGISTERS(V) \
168 : V(xmm0) \
169 : V(xmm1) \
170 : V(xmm2) \
171 : V(xmm3) \
172 : V(xmm4) \
173 : V(xmm5) \
174 : V(xmm6) \
175 : V(xmm7) \
176 : V(xmm8) \
177 : V(xmm9) \
178 : V(xmm10) \
179 : V(xmm11) \
180 : V(xmm12) \
181 : V(xmm13) \
182 : V(xmm14) \
183 : V(xmm15)
184 :
185 : #define FLOAT_REGISTERS DOUBLE_REGISTERS
186 : #define SIMD128_REGISTERS DOUBLE_REGISTERS
187 :
188 : #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
189 : V(xmm0) \
190 : V(xmm1) \
191 : V(xmm2) \
192 : V(xmm3) \
193 : V(xmm4) \
194 : V(xmm5) \
195 : V(xmm6) \
196 : V(xmm7) \
197 : V(xmm8) \
198 : V(xmm9) \
199 : V(xmm10) \
200 : V(xmm11) \
201 : V(xmm12) \
202 : V(xmm13) \
203 : V(xmm14)
204 :
205 : constexpr bool kSimpleFPAliasing = true;
206 : constexpr bool kSimdMaskRegisters = false;
207 :
208 : struct XMMRegister {
209 : enum Code {
210 : #define REGISTER_CODE(R) kCode_##R,
211 : DOUBLE_REGISTERS(REGISTER_CODE)
212 : #undef REGISTER_CODE
213 : kAfterLast,
214 : kCode_no_reg = -1
215 : };
216 :
217 : static constexpr int kMaxNumRegisters = Code::kAfterLast;
218 :
219 : static XMMRegister from_code(int code) {
220 : XMMRegister result = {code};
221 : return result;
222 : }
223 :
224 : bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
225 : bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
226 : int code() const {
227 : DCHECK(is_valid());
228 164248 : return reg_code;
229 : }
230 :
231 : // Return the high bit of the register code as a 0 or 1. Used often
232 : // when constructing the REX prefix byte.
233 11911342 : int high_bit() const { return reg_code >> 3; }
234 : // Return the 3 low bits of the register code. Used when encoding registers
235 : // in modR/M, SIB, and opcode bytes.
236 4512864 : int low_bits() const { return reg_code & 0x7; }
237 :
238 : // Unfortunately we can't make this private in a struct when initializing
239 : // by assignment.
240 : int reg_code;
241 : };
242 :
243 : typedef XMMRegister FloatRegister;
244 :
245 : typedef XMMRegister DoubleRegister;
246 :
247 : typedef XMMRegister Simd128Register;
248 :
249 : #define DECLARE_REGISTER(R) \
250 : constexpr DoubleRegister R = {DoubleRegister::kCode_##R};
251 : DOUBLE_REGISTERS(DECLARE_REGISTER)
252 : #undef DECLARE_REGISTER
253 : constexpr DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
254 :
255 : enum Condition {
256 : // any value < 0 is considered no_condition
257 : no_condition = -1,
258 :
259 : overflow = 0,
260 : no_overflow = 1,
261 : below = 2,
262 : above_equal = 3,
263 : equal = 4,
264 : not_equal = 5,
265 : below_equal = 6,
266 : above = 7,
267 : negative = 8,
268 : positive = 9,
269 : parity_even = 10,
270 : parity_odd = 11,
271 : less = 12,
272 : greater_equal = 13,
273 : less_equal = 14,
274 : greater = 15,
275 :
276 : // Fake conditions that are handled by the
277 : // opcodes using them.
278 : always = 16,
279 : never = 17,
280 : // aliases
281 : carry = below,
282 : not_carry = above_equal,
283 : zero = equal,
284 : not_zero = not_equal,
285 : sign = negative,
286 : not_sign = positive,
287 : last_condition = greater
288 : };
289 :
290 :
291 : // Returns the equivalent of !cc.
292 : // Negation of the default no_condition (-1) results in a non-default
293 : // no_condition value (-2). As long as tests for no_condition check
294 : // for condition < 0, this will work as expected.
295 : inline Condition NegateCondition(Condition cc) {
296 690713 : return static_cast<Condition>(cc ^ 1);
297 : }
298 :
299 :
300 : // Commute a condition such that {a cond b == b cond' a}.
301 6780 : inline Condition CommuteCondition(Condition cc) {
302 6780 : switch (cc) {
303 : case below:
304 : return above;
305 : case above:
306 0 : return below;
307 : case above_equal:
308 0 : return below_equal;
309 : case below_equal:
310 6384 : return above_equal;
311 : case less:
312 71 : return greater;
313 : case greater:
314 15 : return less;
315 : case greater_equal:
316 49 : return less_equal;
317 : case less_equal:
318 26 : return greater_equal;
319 : default:
320 235 : return cc;
321 : }
322 : }
323 :
324 :
325 : enum RoundingMode {
326 : kRoundToNearest = 0x0,
327 : kRoundDown = 0x1,
328 : kRoundUp = 0x2,
329 : kRoundToZero = 0x3
330 : };
331 :
332 :
333 : // -----------------------------------------------------------------------------
334 : // Machine instruction Immediates
335 :
336 : class Immediate BASE_EMBEDDED {
337 : public:
338 17855898 : explicit Immediate(int32_t value) : value_(value) {}
339 : explicit Immediate(int32_t value, RelocInfo::Mode rmode)
340 254150 : : value_(value), rmode_(rmode) {}
341 : explicit Immediate(Smi* value) {
342 : DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
343 : value_ = static_cast<int32_t>(reinterpret_cast<intptr_t>(value));
344 : }
345 :
346 : private:
347 : int32_t value_;
348 : RelocInfo::Mode rmode_ = RelocInfo::NONE32;
349 :
350 : friend class Assembler;
351 : };
352 :
353 :
354 : // -----------------------------------------------------------------------------
355 : // Machine instruction Operands
356 :
357 : enum ScaleFactor {
358 : times_1 = 0,
359 : times_2 = 1,
360 : times_4 = 2,
361 : times_8 = 3,
362 : times_int_size = times_4,
363 : times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
364 : };
365 :
366 :
367 : class Operand BASE_EMBEDDED {
368 : public:
369 : // [base + disp/r]
370 : Operand(Register base, int32_t disp);
371 :
372 : // [base + index*scale + disp/r]
373 : Operand(Register base,
374 : Register index,
375 : ScaleFactor scale,
376 : int32_t disp);
377 :
378 : // [index*scale + disp/r]
379 : Operand(Register index,
380 : ScaleFactor scale,
381 : int32_t disp);
382 :
383 : // Offset from existing memory operand.
384 : // Offset is added to existing displacement as 32-bit signed values and
385 : // this must not overflow.
386 : Operand(const Operand& base, int32_t offset);
387 :
388 : // [rip + disp/r]
389 : explicit Operand(Label* label);
390 :
391 : // Checks whether either base or index register is the given register.
392 : // Does not check the "reg" part of the Operand.
393 : bool AddressUsesRegister(Register reg) const;
394 :
395 : // Queries related to the size of the generated instruction.
396 : // Whether the generated instruction will have a REX prefix.
397 : bool requires_rex() const { return rex_ != 0; }
398 : // Size of the ModR/M, SIB and displacement parts of the generated
399 : // instruction.
400 4851 : int operand_size() const { return len_; }
401 :
402 : private:
403 : byte rex_;
404 : byte buf_[9];
405 : // The number of bytes of buf_ in use.
406 : byte len_;
407 :
408 : // Set the ModR/M byte without an encoded 'reg' register. The
409 : // register is encoded later as part of the emit_operand operation.
410 : // set_modrm can be called before or after set_sib and set_disp*.
411 : inline void set_modrm(int mod, Register rm);
412 :
413 : // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
414 : inline void set_sib(ScaleFactor scale, Register index, Register base);
415 :
416 : // Adds operand displacement fields (offsets added to the memory address).
417 : // Needs to be called after set_sib, not before it.
418 : inline void set_disp8(int disp);
419 : inline void set_disp32(int disp);
420 : inline void set_disp64(int64_t disp); // for labels.
421 :
422 : friend class Assembler;
423 : };
424 :
425 : #define ASSEMBLER_INSTRUCTION_LIST(V) \
426 : V(add) \
427 : V(and) \
428 : V(cmp) \
429 : V(cmpxchg) \
430 : V(dec) \
431 : V(idiv) \
432 : V(div) \
433 : V(imul) \
434 : V(inc) \
435 : V(lea) \
436 : V(mov) \
437 : V(movzxb) \
438 : V(movzxw) \
439 : V(neg) \
440 : V(not) \
441 : V(or) \
442 : V(repmovs) \
443 : V(sbb) \
444 : V(sub) \
445 : V(test) \
446 : V(xchg) \
447 : V(xor)
448 :
449 : // Shift instructions on operands/registers with kPointerSize, kInt32Size and
450 : // kInt64Size.
451 : #define SHIFT_INSTRUCTION_LIST(V) \
452 : V(rol, 0x0) \
453 : V(ror, 0x1) \
454 : V(rcl, 0x2) \
455 : V(rcr, 0x3) \
456 : V(shl, 0x4) \
457 : V(shr, 0x5) \
458 : V(sar, 0x7) \
459 :
460 :
461 : class Assembler : public AssemblerBase {
462 : private:
463 : // We check before assembling an instruction that there is sufficient
464 : // space to write an instruction and its relocation information.
465 : // The relocation writer's position must be kGap bytes above the end of
466 : // the generated instructions. This leaves enough space for the
467 : // longest possible x64 instruction, 15 bytes, and the longest possible
468 : // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
469 : // (There is a 15 byte limit on x64 instruction length that rules out some
470 : // otherwise valid instructions.)
471 : // This allows for a single, fast space check per instruction.
472 : static constexpr int kGap = 32;
473 :
474 : public:
475 : // Create an assembler. Instructions and relocation information are emitted
476 : // into a buffer, with the instructions starting from the beginning and the
477 : // relocation information starting from the end of the buffer. See CodeDesc
478 : // for a detailed comment on the layout (globals.h).
479 : //
480 : // If the provided buffer is NULL, the assembler allocates and grows its own
481 : // buffer, and buffer_size determines the initial buffer size. The buffer is
482 : // owned by the assembler and deallocated upon destruction of the assembler.
483 : //
484 : // If the provided buffer is not NULL, the assembler uses the provided buffer
485 : // for code generation and assumes its size to be buffer_size. If the buffer
486 : // is too small, a fatal error occurs. No deallocation of the buffer is done
487 : // upon destruction of the assembler.
488 13459112 : Assembler(Isolate* isolate, void* buffer, int buffer_size)
489 13459112 : : Assembler(IsolateData(isolate), buffer, buffer_size) {}
490 : Assembler(IsolateData isolate_data, void* buffer, int buffer_size);
491 26916261 : virtual ~Assembler() { }
492 :
493 : // GetCode emits any pending (non-emitted) code and fills the descriptor
494 : // desc. GetCode() is idempotent; it returns the same result if no other
495 : // Assembler functions are invoked in between GetCode() calls.
496 : void GetCode(CodeDesc* desc);
497 :
498 : // Read/Modify the code target in the relative branch/call instruction at pc.
499 : // On the x64 architecture, we use relative jumps with a 32-bit displacement
500 : // to jump to other Code objects in the Code space in the heap.
501 : // Jumps to C functions are done indirectly through a 64-bit register holding
502 : // the absolute address of the target.
503 : // These functions convert between absolute Addresses of Code objects and
504 : // the relative displacements stored in the code.
505 : // The isolate argument is unused (and may be nullptr) when skipping flushing.
506 : static inline Address target_address_at(Address pc, Address constant_pool);
507 : static inline void set_target_address_at(
508 : Isolate* isolate, Address pc, Address constant_pool, Address target,
509 : ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
510 : static inline Address target_address_at(Address pc, Code* code);
511 : static inline void set_target_address_at(
512 : Isolate* isolate, Address pc, Code* code, Address target,
513 : ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
514 :
515 : // Return the code target address at a call site from the return address
516 : // of that call in the instruction stream.
517 : static inline Address target_address_from_return_address(Address pc);
518 :
519 : // This sets the branch destination (which is in the instruction on x64).
520 : // This is for calls and branches within generated code.
521 : inline static void deserialization_set_special_target_at(
522 : Isolate* isolate, Address instruction_payload, Code* code,
523 : Address target);
524 :
525 : // This sets the internal reference at the pc.
526 : inline static void deserialization_set_target_internal_reference_at(
527 : Isolate* isolate, Address pc, Address target,
528 : RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
529 :
530 : static inline RelocInfo::Mode RelocInfoNone() {
531 : if (kPointerSize == kInt64Size) {
532 : return RelocInfo::NONE64;
533 : } else {
534 : DCHECK(kPointerSize == kInt32Size);
535 : return RelocInfo::NONE32;
536 : }
537 : }
538 :
539 : inline Handle<Code> code_target_object_handle_at(Address pc);
540 : inline Address runtime_entry_at(Address pc);
541 : // Number of bytes taken up by the branch target in the code.
542 : static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
543 : // Distance between the address of the code target in the call instruction
544 : // and the return address pushed on the stack.
545 : static constexpr int kCallTargetAddressOffset = 4; // 32-bit displacement.
546 : // The length of call(kScratchRegister).
547 : static constexpr int kCallScratchRegisterInstructionLength = 3;
548 : // The length of call(Immediate32).
549 : static constexpr int kShortCallInstructionLength = 5;
550 : // The length of movq(kScratchRegister, address).
551 : static constexpr int kMoveAddressIntoScratchRegisterInstructionLength =
552 : 2 + kPointerSize;
553 : // The length of movq(kScratchRegister, address) and call(kScratchRegister).
554 : static constexpr int kCallSequenceLength =
555 : kMoveAddressIntoScratchRegisterInstructionLength +
556 : kCallScratchRegisterInstructionLength;
557 :
558 : // The debug break slot must be able to contain an indirect call sequence.
559 : static constexpr int kDebugBreakSlotLength = kCallSequenceLength;
560 : // Distance between start of patched debug break slot and the emitted address
561 : // to jump to.
562 : static constexpr int kPatchDebugBreakSlotAddressOffset =
563 : kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
564 :
565 : // One byte opcode for test eax,0xXXXXXXXX.
566 : static constexpr byte kTestEaxByte = 0xA9;
567 : // One byte opcode for test al, 0xXX.
568 : static constexpr byte kTestAlByte = 0xA8;
569 : // One byte opcode for nop.
570 : static constexpr byte kNopByte = 0x90;
571 :
572 : // One byte prefix for a short conditional jump.
573 : static constexpr byte kJccShortPrefix = 0x70;
574 : static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
575 : static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
576 : static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
577 : static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
578 :
579 : // VEX prefix encodings.
580 : enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
581 : enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
582 : enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
583 : enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
584 :
585 : // ---------------------------------------------------------------------------
586 : // Code generation
587 : //
588 : // Function names correspond one-to-one to x64 instruction mnemonics.
589 : // Unless specified otherwise, instructions operate on 64-bit operands.
590 : //
591 : // If we need versions of an assembly instruction that operate on different
592 : // width arguments, we add a single-letter suffix specifying the width.
593 : // This is done for the following instructions: mov, cmp, inc, dec,
594 : // add, sub, and test.
595 : // There are no versions of these instructions without the suffix.
596 : // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
597 : // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
598 : // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
599 : // - Instructions on 64-bit (quadword) operands/registers use 'q'.
600 : // - Instructions on operands/registers with pointer size use 'p'.
601 :
602 : STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
603 :
604 : #define DECLARE_INSTRUCTION(instruction) \
605 : template<class P1> \
606 : void instruction##p(P1 p1) { \
607 : emit_##instruction(p1, kPointerSize); \
608 : } \
609 : \
610 : template<class P1> \
611 : void instruction##l(P1 p1) { \
612 : emit_##instruction(p1, kInt32Size); \
613 : } \
614 : \
615 : template<class P1> \
616 : void instruction##q(P1 p1) { \
617 : emit_##instruction(p1, kInt64Size); \
618 : } \
619 : \
620 : template<class P1, class P2> \
621 : void instruction##p(P1 p1, P2 p2) { \
622 : emit_##instruction(p1, p2, kPointerSize); \
623 : } \
624 : \
625 : template<class P1, class P2> \
626 : void instruction##l(P1 p1, P2 p2) { \
627 : emit_##instruction(p1, p2, kInt32Size); \
628 : } \
629 : \
630 : template<class P1, class P2> \
631 : void instruction##q(P1 p1, P2 p2) { \
632 : emit_##instruction(p1, p2, kInt64Size); \
633 : } \
634 : \
635 : template<class P1, class P2, class P3> \
636 : void instruction##p(P1 p1, P2 p2, P3 p3) { \
637 : emit_##instruction(p1, p2, p3, kPointerSize); \
638 : } \
639 : \
640 : template<class P1, class P2, class P3> \
641 : void instruction##l(P1 p1, P2 p2, P3 p3) { \
642 : emit_##instruction(p1, p2, p3, kInt32Size); \
643 : } \
644 : \
645 : template<class P1, class P2, class P3> \
646 : void instruction##q(P1 p1, P2 p2, P3 p3) { \
647 : emit_##instruction(p1, p2, p3, kInt64Size); \
648 : }
649 126247213 : ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
650 : #undef DECLARE_INSTRUCTION
651 :
652 : // Insert the smallest number of nop instructions
653 : // possible to align the pc offset to a multiple
654 : // of m, where m must be a power of 2.
655 : void Align(int m);
656 : // Insert the smallest number of zero bytes possible to align the pc offset
657 : // to a mulitple of m. m must be a power of 2 (>= 2).
658 : void DataAlign(int m);
659 : void Nop(int bytes = 1);
660 : // Aligns code to something that's optimal for a jump target for the platform.
661 : void CodeTargetAlign();
662 :
663 : // Stack
664 : void pushfq();
665 : void popfq();
666 :
667 : void pushq(Immediate value);
668 : // Push a 32 bit integer, and guarantee that it is actually pushed as a
669 : // 32 bit value, the normal push will optimize the 8 bit case.
670 : void pushq_imm32(int32_t imm32);
671 : void pushq(Register src);
672 : void pushq(const Operand& src);
673 :
674 : void popq(Register dst);
675 : void popq(const Operand& dst);
676 :
677 : void enter(Immediate size);
678 : void leave();
679 :
680 : // Moves
681 : void movb(Register dst, const Operand& src);
682 : void movb(Register dst, Immediate imm);
683 : void movb(const Operand& dst, Register src);
684 : void movb(const Operand& dst, Immediate imm);
685 :
686 : // Move the low 16 bits of a 64-bit register value to a 16-bit
687 : // memory location.
688 : void movw(Register dst, const Operand& src);
689 : void movw(const Operand& dst, Register src);
690 : void movw(const Operand& dst, Immediate imm);
691 :
692 : // Move the offset of the label location relative to the current
693 : // position (after the move) to the destination.
694 : void movl(const Operand& dst, Label* src);
695 :
696 : // Loads a pointer into a register with a relocation mode.
697 : void movp(Register dst, void* ptr, RelocInfo::Mode rmode);
698 :
699 : // Loads a 64-bit immediate into a register.
700 : void movq(Register dst, int64_t value,
701 : RelocInfo::Mode rmode = RelocInfo::NONE64);
702 : void movq(Register dst, uint64_t value,
703 : RelocInfo::Mode rmode = RelocInfo::NONE64);
704 :
705 : void movsxbl(Register dst, Register src);
706 : void movsxbl(Register dst, const Operand& src);
707 : void movsxbq(Register dst, Register src);
708 : void movsxbq(Register dst, const Operand& src);
709 : void movsxwl(Register dst, Register src);
710 : void movsxwl(Register dst, const Operand& src);
711 : void movsxwq(Register dst, Register src);
712 : void movsxwq(Register dst, const Operand& src);
713 : void movsxlq(Register dst, Register src);
714 : void movsxlq(Register dst, const Operand& src);
715 :
716 : // Repeated moves.
717 :
718 : void repmovsb();
719 : void repmovsw();
720 : void repmovsp() { emit_repmovs(kPointerSize); }
721 : void repmovsl() { emit_repmovs(kInt32Size); }
722 : void repmovsq() { emit_repmovs(kInt64Size); }
723 :
724 : // Instruction to load from an immediate 64-bit pointer into RAX.
725 : void load_rax(void* ptr, RelocInfo::Mode rmode);
726 : void load_rax(ExternalReference ext);
727 :
728 : // Conditional moves.
729 : void cmovq(Condition cc, Register dst, Register src);
730 : void cmovq(Condition cc, Register dst, const Operand& src);
731 : void cmovl(Condition cc, Register dst, Register src);
732 : void cmovl(Condition cc, Register dst, const Operand& src);
733 :
734 : void cmpb(Register dst, Immediate src) {
735 64580 : immediate_arithmetic_op_8(0x7, dst, src);
736 : }
737 :
738 : void cmpb_al(Immediate src);
739 :
740 : void cmpb(Register dst, Register src) {
741 4454 : arithmetic_op_8(0x3A, dst, src);
742 : }
743 :
744 : void cmpb(Register dst, const Operand& src) {
745 17427 : arithmetic_op_8(0x3A, dst, src);
746 : }
747 :
748 : void cmpb(const Operand& dst, Register src) {
749 101 : arithmetic_op_8(0x38, src, dst);
750 : }
751 :
752 : void cmpb(const Operand& dst, Immediate src) {
753 632585 : immediate_arithmetic_op_8(0x7, dst, src);
754 : }
755 :
756 : void cmpw(const Operand& dst, Immediate src) {
757 9 : immediate_arithmetic_op_16(0x7, dst, src);
758 : }
759 :
760 : void cmpw(Register dst, Immediate src) {
761 0 : immediate_arithmetic_op_16(0x7, dst, src);
762 : }
763 :
764 : void cmpw(Register dst, const Operand& src) {
765 92 : arithmetic_op_16(0x3B, dst, src);
766 : }
767 :
768 : void cmpw(Register dst, Register src) {
769 0 : arithmetic_op_16(0x3B, dst, src);
770 : }
771 :
772 : void cmpw(const Operand& dst, Register src) {
773 15 : arithmetic_op_16(0x39, src, dst);
774 : }
775 :
776 0 : void testb(Register reg, const Operand& op) { testb(op, reg); }
777 :
778 0 : void testw(Register reg, const Operand& op) { testw(op, reg); }
779 :
780 : void andb(Register dst, Immediate src) {
781 36713 : immediate_arithmetic_op_8(0x4, dst, src);
782 : }
783 :
784 : void decb(Register dst);
785 : void decb(const Operand& dst);
786 :
787 : // Lock prefix.
788 : void lock();
789 :
790 : void xchgb(Register reg, const Operand& op);
791 : void xchgw(Register reg, const Operand& op);
792 :
793 : void cmpxchgb(const Operand& dst, Register src);
794 : void cmpxchgw(const Operand& dst, Register src);
795 :
796 : // Sign-extends rax into rdx:rax.
797 : void cqo();
798 : // Sign-extends eax into edx:eax.
799 : void cdq();
800 :
801 : // Multiply eax by src, put the result in edx:eax.
802 : void mull(Register src);
803 : void mull(const Operand& src);
804 : // Multiply rax by src, put the result in rdx:rax.
805 : void mulq(Register src);
806 :
807 : #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
808 : void instruction##p(Register dst, Immediate imm8) { \
809 : shift(dst, imm8, subcode, kPointerSize); \
810 : } \
811 : \
812 : void instruction##l(Register dst, Immediate imm8) { \
813 : shift(dst, imm8, subcode, kInt32Size); \
814 : } \
815 : \
816 : void instruction##q(Register dst, Immediate imm8) { \
817 : shift(dst, imm8, subcode, kInt64Size); \
818 : } \
819 : \
820 : void instruction##p(Operand dst, Immediate imm8) { \
821 : shift(dst, imm8, subcode, kPointerSize); \
822 : } \
823 : \
824 : void instruction##l(Operand dst, Immediate imm8) { \
825 : shift(dst, imm8, subcode, kInt32Size); \
826 : } \
827 : \
828 : void instruction##q(Operand dst, Immediate imm8) { \
829 : shift(dst, imm8, subcode, kInt64Size); \
830 : } \
831 : \
832 : void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
833 : \
834 : void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
835 : \
836 : void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
837 : \
838 : void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \
839 : \
840 : void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
841 : \
842 : void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
843 1947090 : SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
844 : #undef DECLARE_SHIFT_INSTRUCTION
845 :
846 : // Shifts dst:src left by cl bits, affecting only dst.
847 : void shld(Register dst, Register src);
848 :
849 : // Shifts src:dst right by cl bits, affecting only dst.
850 : void shrd(Register dst, Register src);
851 :
852 : void store_rax(void* dst, RelocInfo::Mode mode);
853 : void store_rax(ExternalReference ref);
854 :
855 : void subb(Register dst, Immediate src) {
856 3930 : immediate_arithmetic_op_8(0x5, dst, src);
857 : }
858 :
859 : void testb(Register dst, Register src);
860 : void testb(Register reg, Immediate mask);
861 : void testb(const Operand& op, Immediate mask);
862 : void testb(const Operand& op, Register reg);
863 :
864 : void testw(Register dst, Register src);
865 : void testw(Register reg, Immediate mask);
866 : void testw(const Operand& op, Immediate mask);
867 : void testw(const Operand& op, Register reg);
868 :
869 : // Bit operations.
870 : void bt(const Operand& dst, Register src);
871 : void bts(const Operand& dst, Register src);
872 : void bsrq(Register dst, Register src);
873 : void bsrq(Register dst, const Operand& src);
874 : void bsrl(Register dst, Register src);
875 : void bsrl(Register dst, const Operand& src);
876 : void bsfq(Register dst, Register src);
877 : void bsfq(Register dst, const Operand& src);
878 : void bsfl(Register dst, Register src);
879 : void bsfl(Register dst, const Operand& src);
880 :
881 : // Miscellaneous
882 : void clc();
883 : void cld();
884 : void cpuid();
885 : void hlt();
886 : void int3();
887 : void nop();
888 : void ret(int imm16);
889 : void ud2();
890 : void setcc(Condition cc, Register reg);
891 :
892 : void pshufw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
893 : void pshufw(XMMRegister dst, const Operand& src, uint8_t shuffle);
894 :
895 : // Label operations & relative jumps (PPUM Appendix D)
896 : //
897 : // Takes a branch opcode (cc) and a label (L) and generates
898 : // either a backward branch or a forward branch and links it
899 : // to the label fixup chain. Usage:
900 : //
901 : // Label L; // unbound label
902 : // j(cc, &L); // forward branch to unbound label
903 : // bind(&L); // bind label to the current pc
904 : // j(cc, &L); // backward branch to bound label
905 : // bind(&L); // illegal: a label may be bound only once
906 : //
907 : // Note: The same Label can be used for forward and backward branches
908 : // but it may be bound only once.
909 :
910 : void bind(Label* L); // binds an unbound label L to the current code position
911 :
912 : // Calls
913 : // Call near relative 32-bit displacement, relative to next instruction.
914 : void call(Label* L);
915 : void call(Address entry, RelocInfo::Mode rmode);
916 : void call(Handle<Code> target,
917 : RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
918 : TypeFeedbackId ast_id = TypeFeedbackId::None());
919 :
920 : // Calls directly to the given address using a relative offset.
921 : // Should only ever be used in Code objects for calls within the
922 : // same Code object. Should not be used when generating new code (use labels),
923 : // but only when patching existing code.
924 : void call(Address target);
925 :
926 : // Call near absolute indirect, address in register
927 : void call(Register adr);
928 :
929 : // Jumps
930 : // Jump short or near relative.
931 : // Use a 32-bit signed displacement.
932 : // Unconditional jump to L
933 : void jmp(Label* L, Label::Distance distance = Label::kFar);
934 : void jmp(Handle<Code> target, RelocInfo::Mode rmode);
935 :
936 : // Jump near absolute indirect (r64)
937 : void jmp(Register adr);
938 : void jmp(const Operand& src);
939 :
940 : // Conditional jumps
941 : void j(Condition cc,
942 : Label* L,
943 : Label::Distance distance = Label::kFar);
944 : void j(Condition cc, Address entry, RelocInfo::Mode rmode);
945 : void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
946 :
947 : // Floating-point operations
948 : void fld(int i);
949 :
950 : void fld1();
951 : void fldz();
952 : void fldpi();
953 : void fldln2();
954 :
955 : void fld_s(const Operand& adr);
956 : void fld_d(const Operand& adr);
957 :
958 : void fstp_s(const Operand& adr);
959 : void fstp_d(const Operand& adr);
960 : void fstp(int index);
961 :
962 : void fild_s(const Operand& adr);
963 : void fild_d(const Operand& adr);
964 :
965 : void fist_s(const Operand& adr);
966 :
967 : void fistp_s(const Operand& adr);
968 : void fistp_d(const Operand& adr);
969 :
970 : void fisttp_s(const Operand& adr);
971 : void fisttp_d(const Operand& adr);
972 :
973 : void fabs();
974 : void fchs();
975 :
976 : void fadd(int i);
977 : void fsub(int i);
978 : void fmul(int i);
979 : void fdiv(int i);
980 :
981 : void fisub_s(const Operand& adr);
982 :
983 : void faddp(int i = 1);
984 : void fsubp(int i = 1);
985 : void fsubrp(int i = 1);
986 : void fmulp(int i = 1);
987 : void fdivp(int i = 1);
988 : void fprem();
989 : void fprem1();
990 :
991 : void fxch(int i = 1);
992 : void fincstp();
993 : void ffree(int i = 0);
994 :
995 : void ftst();
996 : void fucomp(int i);
997 : void fucompp();
998 : void fucomi(int i);
999 : void fucomip();
1000 :
1001 : void fcompp();
1002 : void fnstsw_ax();
1003 : void fwait();
1004 : void fnclex();
1005 :
1006 : void fsin();
1007 : void fcos();
1008 : void fptan();
1009 : void fyl2x();
1010 : void f2xm1();
1011 : void fscale();
1012 : void fninit();
1013 :
1014 : void frndint();
1015 :
1016 : void sahf();
1017 :
1018 : // SSE instructions
1019 : void addss(XMMRegister dst, XMMRegister src);
1020 : void addss(XMMRegister dst, const Operand& src);
1021 : void subss(XMMRegister dst, XMMRegister src);
1022 : void subss(XMMRegister dst, const Operand& src);
1023 : void mulss(XMMRegister dst, XMMRegister src);
1024 : void mulss(XMMRegister dst, const Operand& src);
1025 : void divss(XMMRegister dst, XMMRegister src);
1026 : void divss(XMMRegister dst, const Operand& src);
1027 :
1028 : void maxss(XMMRegister dst, XMMRegister src);
1029 : void maxss(XMMRegister dst, const Operand& src);
1030 : void minss(XMMRegister dst, XMMRegister src);
1031 : void minss(XMMRegister dst, const Operand& src);
1032 :
1033 : void sqrtss(XMMRegister dst, XMMRegister src);
1034 : void sqrtss(XMMRegister dst, const Operand& src);
1035 :
1036 : void ucomiss(XMMRegister dst, XMMRegister src);
1037 : void ucomiss(XMMRegister dst, const Operand& src);
1038 : void movaps(XMMRegister dst, XMMRegister src);
1039 :
1040 : // Don't use this unless it's important to keep the
1041 : // top half of the destination register unchanged.
1042 : // Use movaps when moving float values and movd for integer
1043 : // values in xmm registers.
1044 : void movss(XMMRegister dst, XMMRegister src);
1045 :
1046 : void movss(XMMRegister dst, const Operand& src);
1047 : void movss(const Operand& dst, XMMRegister src);
1048 : void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1049 :
1050 : void cvttss2si(Register dst, const Operand& src);
1051 : void cvttss2si(Register dst, XMMRegister src);
1052 : void cvtlsi2ss(XMMRegister dst, const Operand& src);
1053 : void cvtlsi2ss(XMMRegister dst, Register src);
1054 :
1055 : void andps(XMMRegister dst, XMMRegister src);
1056 : void andps(XMMRegister dst, const Operand& src);
1057 : void orps(XMMRegister dst, XMMRegister src);
1058 : void orps(XMMRegister dst, const Operand& src);
1059 : void xorps(XMMRegister dst, XMMRegister src);
1060 : void xorps(XMMRegister dst, const Operand& src);
1061 :
1062 : void addps(XMMRegister dst, XMMRegister src);
1063 : void addps(XMMRegister dst, const Operand& src);
1064 : void subps(XMMRegister dst, XMMRegister src);
1065 : void subps(XMMRegister dst, const Operand& src);
1066 : void mulps(XMMRegister dst, XMMRegister src);
1067 : void mulps(XMMRegister dst, const Operand& src);
1068 : void divps(XMMRegister dst, XMMRegister src);
1069 : void divps(XMMRegister dst, const Operand& src);
1070 :
1071 : void movmskps(Register dst, XMMRegister src);
1072 :
1073 : void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1074 : SIMDPrefix pp, LeadingOpcode m, VexW w);
1075 : void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
1076 : SIMDPrefix pp, LeadingOpcode m, VexW w);
1077 :
1078 : // SSE2 instructions
1079 : void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
1080 : byte opcode);
1081 : void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
1082 : byte opcode);
1083 : #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1084 : void instruction(XMMRegister dst, XMMRegister src) { \
1085 : sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1086 : } \
1087 : void instruction(XMMRegister dst, const Operand& src) { \
1088 : sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1089 : }
1090 :
1091 323 : SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1092 : #undef DECLARE_SSE2_INSTRUCTION
1093 :
1094 : #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1095 : void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1096 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1097 : } \
1098 : void v##instruction(XMMRegister dst, XMMRegister src1, \
1099 : const Operand& src2) { \
1100 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1101 : }
1102 :
1103 36135 : SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1104 : #undef DECLARE_SSE2_AVX_INSTRUCTION
1105 :
1106 : // SSE3
1107 : void lddqu(XMMRegister dst, const Operand& src);
1108 :
1109 : // SSSE3
1110 : void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1111 : byte escape2, byte opcode);
1112 : void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
1113 : byte escape1, byte escape2, byte opcode);
1114 :
1115 : #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1116 : opcode) \
1117 : void instruction(XMMRegister dst, XMMRegister src) { \
1118 : ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1119 : } \
1120 : void instruction(XMMRegister dst, const Operand& src) { \
1121 : ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1122 : }
1123 :
1124 231 : SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1125 : #undef DECLARE_SSSE3_INSTRUCTION
1126 :
1127 : // SSE4
1128 : void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1129 : byte escape2, byte opcode);
1130 : void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
1131 : byte escape1, byte escape2, byte opcode);
1132 : #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1133 : opcode) \
1134 : void instruction(XMMRegister dst, XMMRegister src) { \
1135 : sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1136 : } \
1137 : void instruction(XMMRegister dst, const Operand& src) { \
1138 : sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1139 : }
1140 :
1141 63 : SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1142 : #undef DECLARE_SSE4_INSTRUCTION
1143 :
1144 : #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1145 : opcode) \
1146 : void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1147 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1148 : } \
1149 : void v##instruction(XMMRegister dst, XMMRegister src1, \
1150 : const Operand& src2) { \
1151 : vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1152 : }
1153 :
1154 : SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1155 : SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1156 : #undef DECLARE_SSE34_AVX_INSTRUCTION
1157 :
1158 : void movd(XMMRegister dst, Register src);
1159 : void movd(XMMRegister dst, const Operand& src);
1160 : void movd(Register dst, XMMRegister src);
1161 : void movq(XMMRegister dst, Register src);
1162 : void movq(Register dst, XMMRegister src);
1163 : void movq(XMMRegister dst, XMMRegister src);
1164 :
1165 : // Don't use this unless it's important to keep the
1166 : // top half of the destination register unchanged.
1167 : // Use movapd when moving double values and movq for integer
1168 : // values in xmm registers.
1169 : void movsd(XMMRegister dst, XMMRegister src);
1170 :
1171 : void movsd(const Operand& dst, XMMRegister src);
1172 : void movsd(XMMRegister dst, const Operand& src);
1173 :
1174 : void movdqa(const Operand& dst, XMMRegister src);
1175 : void movdqa(XMMRegister dst, const Operand& src);
1176 :
1177 : void movdqu(const Operand& dst, XMMRegister src);
1178 : void movdqu(XMMRegister dst, const Operand& src);
1179 :
1180 : void movapd(XMMRegister dst, XMMRegister src);
1181 : void movupd(XMMRegister dst, const Operand& src);
1182 : void movupd(const Operand& dst, XMMRegister src);
1183 :
1184 : void psllq(XMMRegister reg, byte imm8);
1185 : void psrlq(XMMRegister reg, byte imm8);
1186 : void psllw(XMMRegister reg, byte imm8);
1187 : void pslld(XMMRegister reg, byte imm8);
1188 : void psrlw(XMMRegister reg, byte imm8);
1189 : void psrld(XMMRegister reg, byte imm8);
1190 : void psraw(XMMRegister reg, byte imm8);
1191 : void psrad(XMMRegister reg, byte imm8);
1192 :
1193 : void cvttsd2si(Register dst, const Operand& src);
1194 : void cvttsd2si(Register dst, XMMRegister src);
1195 : void cvttss2siq(Register dst, XMMRegister src);
1196 : void cvttss2siq(Register dst, const Operand& src);
1197 : void cvttsd2siq(Register dst, XMMRegister src);
1198 : void cvttsd2siq(Register dst, const Operand& src);
1199 :
1200 : void cvtlsi2sd(XMMRegister dst, const Operand& src);
1201 : void cvtlsi2sd(XMMRegister dst, Register src);
1202 :
1203 : void cvtqsi2ss(XMMRegister dst, const Operand& src);
1204 : void cvtqsi2ss(XMMRegister dst, Register src);
1205 :
1206 : void cvtqsi2sd(XMMRegister dst, const Operand& src);
1207 : void cvtqsi2sd(XMMRegister dst, Register src);
1208 :
1209 :
1210 : void cvtss2sd(XMMRegister dst, XMMRegister src);
1211 : void cvtss2sd(XMMRegister dst, const Operand& src);
1212 : void cvtsd2ss(XMMRegister dst, XMMRegister src);
1213 : void cvtsd2ss(XMMRegister dst, const Operand& src);
1214 :
1215 : void cvtsd2si(Register dst, XMMRegister src);
1216 : void cvtsd2siq(Register dst, XMMRegister src);
1217 :
1218 : void addsd(XMMRegister dst, XMMRegister src);
1219 : void addsd(XMMRegister dst, const Operand& src);
1220 : void subsd(XMMRegister dst, XMMRegister src);
1221 : void subsd(XMMRegister dst, const Operand& src);
1222 : void mulsd(XMMRegister dst, XMMRegister src);
1223 : void mulsd(XMMRegister dst, const Operand& src);
1224 : void divsd(XMMRegister dst, XMMRegister src);
1225 : void divsd(XMMRegister dst, const Operand& src);
1226 :
1227 : void maxsd(XMMRegister dst, XMMRegister src);
1228 : void maxsd(XMMRegister dst, const Operand& src);
1229 : void minsd(XMMRegister dst, XMMRegister src);
1230 : void minsd(XMMRegister dst, const Operand& src);
1231 :
1232 : void andpd(XMMRegister dst, XMMRegister src);
1233 : void andpd(XMMRegister dst, const Operand& src);
1234 : void orpd(XMMRegister dst, XMMRegister src);
1235 : void orpd(XMMRegister dst, const Operand& src);
1236 : void xorpd(XMMRegister dst, XMMRegister src);
1237 : void xorpd(XMMRegister dst, const Operand& src);
1238 : void sqrtsd(XMMRegister dst, XMMRegister src);
1239 : void sqrtsd(XMMRegister dst, const Operand& src);
1240 :
1241 : void ucomisd(XMMRegister dst, XMMRegister src);
1242 : void ucomisd(XMMRegister dst, const Operand& src);
1243 : void cmpltsd(XMMRegister dst, XMMRegister src);
1244 :
1245 : void movmskpd(Register dst, XMMRegister src);
1246 :
1247 : void punpckldq(XMMRegister dst, XMMRegister src);
1248 : void punpckldq(XMMRegister dst, const Operand& src);
1249 : void punpckhdq(XMMRegister dst, XMMRegister src);
1250 :
1251 : // SSE 4.1 instruction
1252 : void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1253 : void extractps(Register dst, XMMRegister src, byte imm8);
1254 : void pextrb(Register dst, XMMRegister src, int8_t imm8);
1255 : void pextrb(const Operand& dst, XMMRegister src, int8_t imm8);
1256 : void pextrw(Register dst, XMMRegister src, int8_t imm8);
1257 : void pextrw(const Operand& dst, XMMRegister src, int8_t imm8);
1258 : void pextrd(Register dst, XMMRegister src, int8_t imm8);
1259 : void pextrd(const Operand& dst, XMMRegister src, int8_t imm8);
1260 : void pinsrb(XMMRegister dst, Register src, int8_t imm8);
1261 : void pinsrb(XMMRegister dst, const Operand& src, int8_t imm8);
1262 : void pinsrw(XMMRegister dst, Register src, int8_t imm8);
1263 : void pinsrw(XMMRegister dst, const Operand& src, int8_t imm8);
1264 : void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1265 : void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
1266 :
1267 : void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1268 : void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1269 :
1270 : void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1271 : void cmpps(XMMRegister dst, const Operand& src, int8_t cmp);
1272 : void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1273 : void cmppd(XMMRegister dst, const Operand& src, int8_t cmp);
1274 :
1275 : #define SSE_CMP_P(instr, imm8) \
1276 : void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1277 : void instr##ps(XMMRegister dst, const Operand& src) { \
1278 : cmpps(dst, src, imm8); \
1279 : } \
1280 : void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1281 : void instr##pd(XMMRegister dst, const Operand& src) { cmppd(dst, src, imm8); }
1282 :
1283 : SSE_CMP_P(cmpeq, 0x0);
1284 : SSE_CMP_P(cmplt, 0x1);
1285 : SSE_CMP_P(cmple, 0x2);
1286 : SSE_CMP_P(cmpneq, 0x4);
1287 : SSE_CMP_P(cmpnlt, 0x5);
1288 : SSE_CMP_P(cmpnle, 0x6);
1289 :
1290 : #undef SSE_CMP_P
1291 :
1292 : void minps(XMMRegister dst, XMMRegister src);
1293 : void minps(XMMRegister dst, const Operand& src);
1294 : void maxps(XMMRegister dst, XMMRegister src);
1295 : void maxps(XMMRegister dst, const Operand& src);
1296 : void rcpps(XMMRegister dst, XMMRegister src);
1297 : void rcpps(XMMRegister dst, const Operand& src);
1298 : void rsqrtps(XMMRegister dst, XMMRegister src);
1299 : void rsqrtps(XMMRegister dst, const Operand& src);
1300 : void sqrtps(XMMRegister dst, XMMRegister src);
1301 : void sqrtps(XMMRegister dst, const Operand& src);
1302 : void movups(XMMRegister dst, XMMRegister src);
1303 : void movups(XMMRegister dst, const Operand& src);
1304 : void movups(const Operand& dst, XMMRegister src);
1305 : void psrldq(XMMRegister dst, uint8_t shift);
1306 : void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1307 : void pshufd(XMMRegister dst, const Operand& src, uint8_t shuffle);
1308 : void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1309 : void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1310 : void cvtdq2ps(XMMRegister dst, XMMRegister src);
1311 : void cvtdq2ps(XMMRegister dst, const Operand& src);
1312 :
1313 : // AVX instruction
1314 : void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1315 : vfmasd(0x99, dst, src1, src2);
1316 : }
1317 : void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1318 : vfmasd(0xa9, dst, src1, src2);
1319 : }
1320 : void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1321 : vfmasd(0xb9, dst, src1, src2);
1322 : }
1323 : void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1324 : vfmasd(0x99, dst, src1, src2);
1325 : }
1326 : void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1327 : vfmasd(0xa9, dst, src1, src2);
1328 : }
1329 : void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1330 : vfmasd(0xb9, dst, src1, src2);
1331 : }
1332 : void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1333 : vfmasd(0x9b, dst, src1, src2);
1334 : }
1335 : void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1336 : vfmasd(0xab, dst, src1, src2);
1337 : }
1338 : void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1339 : vfmasd(0xbb, dst, src1, src2);
1340 : }
1341 : void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1342 : vfmasd(0x9b, dst, src1, src2);
1343 : }
1344 : void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1345 : vfmasd(0xab, dst, src1, src2);
1346 : }
1347 : void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1348 : vfmasd(0xbb, dst, src1, src2);
1349 : }
1350 : void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1351 : vfmasd(0x9d, dst, src1, src2);
1352 : }
1353 : void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1354 : vfmasd(0xad, dst, src1, src2);
1355 : }
1356 : void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1357 : vfmasd(0xbd, dst, src1, src2);
1358 : }
1359 : void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1360 : vfmasd(0x9d, dst, src1, src2);
1361 : }
1362 : void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1363 : vfmasd(0xad, dst, src1, src2);
1364 : }
1365 : void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1366 : vfmasd(0xbd, dst, src1, src2);
1367 : }
1368 : void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1369 : vfmasd(0x9f, dst, src1, src2);
1370 : }
1371 : void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1372 : vfmasd(0xaf, dst, src1, src2);
1373 : }
1374 : void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1375 : vfmasd(0xbf, dst, src1, src2);
1376 : }
1377 : void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1378 : vfmasd(0x9f, dst, src1, src2);
1379 : }
1380 : void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1381 : vfmasd(0xaf, dst, src1, src2);
1382 : }
1383 : void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1384 : vfmasd(0xbf, dst, src1, src2);
1385 : }
1386 : void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1387 : void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1388 :
1389 : void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1390 : vfmass(0x99, dst, src1, src2);
1391 : }
1392 : void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1393 : vfmass(0xa9, dst, src1, src2);
1394 : }
1395 : void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1396 : vfmass(0xb9, dst, src1, src2);
1397 : }
1398 : void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1399 : vfmass(0x99, dst, src1, src2);
1400 : }
1401 : void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1402 : vfmass(0xa9, dst, src1, src2);
1403 : }
1404 : void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1405 : vfmass(0xb9, dst, src1, src2);
1406 : }
1407 : void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1408 : vfmass(0x9b, dst, src1, src2);
1409 : }
1410 : void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1411 : vfmass(0xab, dst, src1, src2);
1412 : }
1413 : void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1414 : vfmass(0xbb, dst, src1, src2);
1415 : }
1416 : void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1417 : vfmass(0x9b, dst, src1, src2);
1418 : }
1419 : void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1420 : vfmass(0xab, dst, src1, src2);
1421 : }
1422 : void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1423 : vfmass(0xbb, dst, src1, src2);
1424 : }
1425 : void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1426 : vfmass(0x9d, dst, src1, src2);
1427 : }
1428 : void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1429 : vfmass(0xad, dst, src1, src2);
1430 : }
1431 : void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1432 : vfmass(0xbd, dst, src1, src2);
1433 : }
1434 : void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1435 : vfmass(0x9d, dst, src1, src2);
1436 : }
1437 : void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1438 : vfmass(0xad, dst, src1, src2);
1439 : }
1440 : void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1441 : vfmass(0xbd, dst, src1, src2);
1442 : }
1443 : void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1444 : vfmass(0x9f, dst, src1, src2);
1445 : }
1446 : void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1447 : vfmass(0xaf, dst, src1, src2);
1448 : }
1449 : void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1450 : vfmass(0xbf, dst, src1, src2);
1451 : }
1452 : void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1453 : vfmass(0x9f, dst, src1, src2);
1454 : }
1455 : void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1456 : vfmass(0xaf, dst, src1, src2);
1457 : }
1458 : void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1459 : vfmass(0xbf, dst, src1, src2);
1460 : }
1461 : void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1462 : void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1463 :
1464 : void vmovd(XMMRegister dst, Register src);
1465 : void vmovd(XMMRegister dst, const Operand& src);
1466 : void vmovd(Register dst, XMMRegister src);
1467 : void vmovq(XMMRegister dst, Register src);
1468 : void vmovq(XMMRegister dst, const Operand& src);
1469 : void vmovq(Register dst, XMMRegister src);
1470 :
1471 1122 : void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1472 : vsd(0x10, dst, src1, src2);
1473 1122 : }
1474 3042962 : void vmovsd(XMMRegister dst, const Operand& src) {
1475 : vsd(0x10, dst, xmm0, src);
1476 3042962 : }
1477 2714252 : void vmovsd(const Operand& dst, XMMRegister src) {
1478 : vsd(0x11, src, xmm0, dst);
1479 2714252 : }
1480 :
1481 : #define AVX_SP_3(instr, opcode) \
1482 : AVX_S_3(instr, opcode) \
1483 : AVX_P_3(instr, opcode)
1484 :
1485 : #define AVX_S_3(instr, opcode) \
1486 : AVX_3(instr##ss, opcode, vss) \
1487 : AVX_3(instr##sd, opcode, vsd)
1488 :
1489 : #define AVX_P_3(instr, opcode) \
1490 : AVX_3(instr##ps, opcode, vps) \
1491 : AVX_3(instr##pd, opcode, vpd)
1492 :
1493 : #define AVX_3(instr, opcode, impl) \
1494 : void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1495 : impl(opcode, dst, src1, src2); \
1496 : } \
1497 : void instr(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1498 : impl(opcode, dst, src1, src2); \
1499 : }
1500 :
1501 722 : AVX_SP_3(vsqrt, 0x51);
1502 203662 : AVX_SP_3(vadd, 0x58);
1503 64858 : AVX_SP_3(vsub, 0x5c);
1504 57143 : AVX_SP_3(vmul, 0x59);
1505 156601 : AVX_SP_3(vdiv, 0x5e);
1506 : AVX_SP_3(vmin, 0x5d);
1507 : AVX_SP_3(vmax, 0x5f);
1508 1131 : AVX_P_3(vand, 0x54);
1509 714 : AVX_P_3(vor, 0x56);
1510 656962 : AVX_P_3(vxor, 0x57);
1511 17696 : AVX_3(vcvtsd2ss, 0x5a, vsd);
1512 :
1513 : #undef AVX_3
1514 : #undef AVX_S_3
1515 : #undef AVX_P_3
1516 : #undef AVX_SP_3
1517 :
1518 : void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1519 322 : XMMRegister iop = {2};
1520 322 : vpd(0x73, iop, dst, src);
1521 : emit(imm8);
1522 : }
1523 : void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1524 26324 : XMMRegister iop = {6};
1525 26324 : vpd(0x73, iop, dst, src);
1526 : emit(imm8);
1527 : }
1528 : void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1529 10295 : vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1530 : }
1531 : void vcvtss2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1532 2110 : vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1533 : }
1534 : void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1535 405034 : XMMRegister isrc2 = {src2.code()};
1536 405034 : vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1537 : }
1538 : void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1539 8408 : vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1540 : }
1541 : void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1542 309 : XMMRegister isrc2 = {src2.code()};
1543 309 : vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1544 : }
1545 : void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1546 0 : vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1547 : }
1548 : void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1549 76 : XMMRegister isrc2 = {src2.code()};
1550 76 : vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1551 : }
1552 : void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1553 0 : vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1554 : }
1555 : void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1556 4172 : XMMRegister isrc2 = {src2.code()};
1557 4172 : vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1558 : }
1559 : void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
1560 0 : vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1561 : }
1562 132 : void vcvttss2si(Register dst, XMMRegister src) {
1563 132 : XMMRegister idst = {dst.code()};
1564 132 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1565 132 : }
1566 0 : void vcvttss2si(Register dst, const Operand& src) {
1567 0 : XMMRegister idst = {dst.code()};
1568 0 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1569 0 : }
1570 148862 : void vcvttsd2si(Register dst, XMMRegister src) {
1571 148862 : XMMRegister idst = {dst.code()};
1572 148862 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1573 148862 : }
1574 8336 : void vcvttsd2si(Register dst, const Operand& src) {
1575 8336 : XMMRegister idst = {dst.code()};
1576 8336 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1577 8336 : }
1578 91 : void vcvttss2siq(Register dst, XMMRegister src) {
1579 91 : XMMRegister idst = {dst.code()};
1580 91 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1581 91 : }
1582 0 : void vcvttss2siq(Register dst, const Operand& src) {
1583 0 : XMMRegister idst = {dst.code()};
1584 0 : vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1585 0 : }
1586 103201 : void vcvttsd2siq(Register dst, XMMRegister src) {
1587 103201 : XMMRegister idst = {dst.code()};
1588 103201 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1589 103201 : }
1590 0 : void vcvttsd2siq(Register dst, const Operand& src) {
1591 0 : XMMRegister idst = {dst.code()};
1592 0 : vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1593 0 : }
1594 244 : void vcvtsd2si(Register dst, XMMRegister src) {
1595 244 : XMMRegister idst = {dst.code()};
1596 244 : vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1597 244 : }
1598 : void vucomisd(XMMRegister dst, XMMRegister src) {
1599 299236 : vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1600 : }
1601 : void vucomisd(XMMRegister dst, const Operand& src) {
1602 79244 : vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1603 : }
1604 239 : void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1605 : RoundingMode mode) {
1606 239 : vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1607 239 : emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1608 239 : }
1609 41881 : void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1610 : RoundingMode mode) {
1611 41881 : vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1612 41881 : emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1613 41881 : }
1614 :
1615 : void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1616 245955 : vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1617 : }
1618 : void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2) {
1619 5761829 : vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1620 : }
1621 :
1622 : void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1623 58 : vss(0x10, dst, src1, src2);
1624 : }
1625 : void vmovss(XMMRegister dst, const Operand& src) {
1626 17969 : vss(0x10, dst, xmm0, src);
1627 : }
1628 : void vmovss(const Operand& dst, XMMRegister src) {
1629 1288863 : vss(0x11, src, xmm0, dst);
1630 : }
1631 : void vucomiss(XMMRegister dst, XMMRegister src);
1632 : void vucomiss(XMMRegister dst, const Operand& src);
1633 : void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1634 : void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1635 :
1636 44 : void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
1637 0 : void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
1638 : void vmovups(XMMRegister dst, const Operand& src) {
1639 14 : vps(0x10, dst, xmm0, src);
1640 : }
1641 : void vmovups(const Operand& dst, XMMRegister src) {
1642 14 : vps(0x11, src, xmm0, dst);
1643 : }
1644 116980 : void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
1645 : void vmovupd(XMMRegister dst, const Operand& src) {
1646 14 : vpd(0x10, dst, xmm0, src);
1647 : }
1648 : void vmovupd(const Operand& dst, XMMRegister src) {
1649 14 : vpd(0x11, src, xmm0, dst);
1650 : }
1651 : void vmovmskps(Register dst, XMMRegister src) {
1652 58 : XMMRegister idst = {dst.code()};
1653 58 : vps(0x50, idst, xmm0, src);
1654 : }
1655 : void vmovmskpd(Register dst, XMMRegister src) {
1656 7148 : XMMRegister idst = {dst.code()};
1657 7148 : vpd(0x50, idst, xmm0, src);
1658 : }
1659 : void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1660 : vps(0xC2, dst, src1, src2);
1661 : emit(cmp);
1662 : }
1663 : void vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
1664 : int8_t cmp) {
1665 : vps(0xC2, dst, src1, src2);
1666 : emit(cmp);
1667 : }
1668 : void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1669 : vpd(0xC2, dst, src1, src2);
1670 : emit(cmp);
1671 : }
1672 : void vcmppd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1673 : int8_t cmp) {
1674 : vpd(0xC2, dst, src1, src2);
1675 : emit(cmp);
1676 : }
1677 :
1678 : #define AVX_CMP_P(instr, imm8) \
1679 : void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1680 : vcmpps(dst, src1, src2, imm8); \
1681 : } \
1682 : void instr##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1683 : vcmpps(dst, src1, src2, imm8); \
1684 : } \
1685 : void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1686 : vcmppd(dst, src1, src2, imm8); \
1687 : } \
1688 : void instr##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
1689 : vcmppd(dst, src1, src2, imm8); \
1690 : }
1691 :
1692 : AVX_CMP_P(vcmpeq, 0x0);
1693 : AVX_CMP_P(vcmplt, 0x1);
1694 : AVX_CMP_P(vcmple, 0x2);
1695 : AVX_CMP_P(vcmpneq, 0x4);
1696 : AVX_CMP_P(vcmpnlt, 0x5);
1697 : AVX_CMP_P(vcmpnle, 0x6);
1698 :
1699 : #undef AVX_CMP_P
1700 :
1701 : void vlddqu(XMMRegister dst, const Operand& src) {
1702 : vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1703 : }
1704 : void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1705 : XMMRegister iop = {6};
1706 : vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1707 : emit(imm8);
1708 : }
1709 : void vpsrlw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1710 : XMMRegister iop = {2};
1711 : vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1712 : emit(imm8);
1713 : }
1714 : void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8) {
1715 : XMMRegister iop = {4};
1716 : vinstr(0x71, iop, dst, src, k66, k0F, kWIG);
1717 : emit(imm8);
1718 : }
1719 : void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1720 : XMMRegister iop = {6};
1721 : vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1722 : emit(imm8);
1723 : }
1724 : void vpsrld(XMMRegister dst, XMMRegister src, int8_t imm8) {
1725 : XMMRegister iop = {2};
1726 : vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1727 : emit(imm8);
1728 : }
1729 : void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8) {
1730 : XMMRegister iop = {4};
1731 : vinstr(0x72, iop, dst, src, k66, k0F, kWIG);
1732 : emit(imm8);
1733 : }
1734 : void vpextrb(Register dst, XMMRegister src, int8_t imm8) {
1735 : XMMRegister idst = {dst.code()};
1736 : vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
1737 : emit(imm8);
1738 : }
1739 : void vpextrb(const Operand& dst, XMMRegister src, int8_t imm8) {
1740 : vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
1741 : emit(imm8);
1742 : }
1743 : void vpextrw(Register dst, XMMRegister src, int8_t imm8) {
1744 : XMMRegister idst = {dst.code()};
1745 : vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
1746 : emit(imm8);
1747 : }
1748 : void vpextrw(const Operand& dst, XMMRegister src, int8_t imm8) {
1749 : vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
1750 : emit(imm8);
1751 : }
1752 : void vpextrd(Register dst, XMMRegister src, int8_t imm8) {
1753 : XMMRegister idst = {dst.code()};
1754 : vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
1755 : emit(imm8);
1756 : }
1757 : void vpextrd(const Operand& dst, XMMRegister src, int8_t imm8) {
1758 : vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
1759 : emit(imm8);
1760 : }
1761 : void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1762 : XMMRegister isrc = {src2.code()};
1763 : vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1764 : emit(imm8);
1765 : }
1766 : void vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
1767 : int8_t imm8) {
1768 : vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1769 : emit(imm8);
1770 : }
1771 : void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1772 : XMMRegister isrc = {src2.code()};
1773 : vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1774 : emit(imm8);
1775 : }
1776 : void vpinsrw(XMMRegister dst, XMMRegister src1, const Operand& src2,
1777 : int8_t imm8) {
1778 : vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1779 : emit(imm8);
1780 : }
1781 : void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
1782 : XMMRegister isrc = {src2.code()};
1783 : vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1784 : emit(imm8);
1785 : }
1786 : void vpinsrd(XMMRegister dst, XMMRegister src1, const Operand& src2,
1787 : int8_t imm8) {
1788 : vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1789 : emit(imm8);
1790 : }
1791 : void vpshufd(XMMRegister dst, XMMRegister src, int8_t imm8) {
1792 : vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1793 : emit(imm8);
1794 : }
1795 :
1796 : void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1797 : void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1798 : void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1799 : void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
1800 :
1801 : // BMI instruction
1802 : void andnq(Register dst, Register src1, Register src2) {
1803 : bmi1q(0xf2, dst, src1, src2);
1804 : }
1805 : void andnq(Register dst, Register src1, const Operand& src2) {
1806 : bmi1q(0xf2, dst, src1, src2);
1807 : }
1808 : void andnl(Register dst, Register src1, Register src2) {
1809 : bmi1l(0xf2, dst, src1, src2);
1810 : }
1811 : void andnl(Register dst, Register src1, const Operand& src2) {
1812 : bmi1l(0xf2, dst, src1, src2);
1813 : }
1814 : void bextrq(Register dst, Register src1, Register src2) {
1815 : bmi1q(0xf7, dst, src2, src1);
1816 : }
1817 : void bextrq(Register dst, const Operand& src1, Register src2) {
1818 : bmi1q(0xf7, dst, src2, src1);
1819 : }
1820 : void bextrl(Register dst, Register src1, Register src2) {
1821 : bmi1l(0xf7, dst, src2, src1);
1822 : }
1823 : void bextrl(Register dst, const Operand& src1, Register src2) {
1824 : bmi1l(0xf7, dst, src2, src1);
1825 : }
1826 : void blsiq(Register dst, Register src) {
1827 : Register ireg = {3};
1828 : bmi1q(0xf3, ireg, dst, src);
1829 : }
1830 : void blsiq(Register dst, const Operand& src) {
1831 : Register ireg = {3};
1832 : bmi1q(0xf3, ireg, dst, src);
1833 : }
1834 : void blsil(Register dst, Register src) {
1835 : Register ireg = {3};
1836 : bmi1l(0xf3, ireg, dst, src);
1837 : }
1838 : void blsil(Register dst, const Operand& src) {
1839 : Register ireg = {3};
1840 : bmi1l(0xf3, ireg, dst, src);
1841 : }
1842 : void blsmskq(Register dst, Register src) {
1843 : Register ireg = {2};
1844 : bmi1q(0xf3, ireg, dst, src);
1845 : }
1846 : void blsmskq(Register dst, const Operand& src) {
1847 : Register ireg = {2};
1848 : bmi1q(0xf3, ireg, dst, src);
1849 : }
1850 : void blsmskl(Register dst, Register src) {
1851 : Register ireg = {2};
1852 : bmi1l(0xf3, ireg, dst, src);
1853 : }
1854 : void blsmskl(Register dst, const Operand& src) {
1855 : Register ireg = {2};
1856 : bmi1l(0xf3, ireg, dst, src);
1857 : }
1858 : void blsrq(Register dst, Register src) {
1859 : Register ireg = {1};
1860 : bmi1q(0xf3, ireg, dst, src);
1861 : }
1862 : void blsrq(Register dst, const Operand& src) {
1863 : Register ireg = {1};
1864 : bmi1q(0xf3, ireg, dst, src);
1865 : }
1866 : void blsrl(Register dst, Register src) {
1867 : Register ireg = {1};
1868 : bmi1l(0xf3, ireg, dst, src);
1869 : }
1870 : void blsrl(Register dst, const Operand& src) {
1871 : Register ireg = {1};
1872 : bmi1l(0xf3, ireg, dst, src);
1873 : }
1874 : void tzcntq(Register dst, Register src);
1875 : void tzcntq(Register dst, const Operand& src);
1876 : void tzcntl(Register dst, Register src);
1877 : void tzcntl(Register dst, const Operand& src);
1878 :
1879 : void lzcntq(Register dst, Register src);
1880 : void lzcntq(Register dst, const Operand& src);
1881 : void lzcntl(Register dst, Register src);
1882 : void lzcntl(Register dst, const Operand& src);
1883 :
1884 : void popcntq(Register dst, Register src);
1885 : void popcntq(Register dst, const Operand& src);
1886 : void popcntl(Register dst, Register src);
1887 : void popcntl(Register dst, const Operand& src);
1888 :
1889 : void bzhiq(Register dst, Register src1, Register src2) {
1890 : bmi2q(kNone, 0xf5, dst, src2, src1);
1891 : }
1892 : void bzhiq(Register dst, const Operand& src1, Register src2) {
1893 : bmi2q(kNone, 0xf5, dst, src2, src1);
1894 : }
1895 : void bzhil(Register dst, Register src1, Register src2) {
1896 : bmi2l(kNone, 0xf5, dst, src2, src1);
1897 : }
1898 : void bzhil(Register dst, const Operand& src1, Register src2) {
1899 : bmi2l(kNone, 0xf5, dst, src2, src1);
1900 : }
1901 : void mulxq(Register dst1, Register dst2, Register src) {
1902 : bmi2q(kF2, 0xf6, dst1, dst2, src);
1903 : }
1904 : void mulxq(Register dst1, Register dst2, const Operand& src) {
1905 : bmi2q(kF2, 0xf6, dst1, dst2, src);
1906 : }
1907 : void mulxl(Register dst1, Register dst2, Register src) {
1908 : bmi2l(kF2, 0xf6, dst1, dst2, src);
1909 : }
1910 : void mulxl(Register dst1, Register dst2, const Operand& src) {
1911 : bmi2l(kF2, 0xf6, dst1, dst2, src);
1912 : }
1913 : void pdepq(Register dst, Register src1, Register src2) {
1914 : bmi2q(kF2, 0xf5, dst, src1, src2);
1915 : }
1916 : void pdepq(Register dst, Register src1, const Operand& src2) {
1917 : bmi2q(kF2, 0xf5, dst, src1, src2);
1918 : }
1919 : void pdepl(Register dst, Register src1, Register src2) {
1920 : bmi2l(kF2, 0xf5, dst, src1, src2);
1921 : }
1922 : void pdepl(Register dst, Register src1, const Operand& src2) {
1923 : bmi2l(kF2, 0xf5, dst, src1, src2);
1924 : }
1925 : void pextq(Register dst, Register src1, Register src2) {
1926 : bmi2q(kF3, 0xf5, dst, src1, src2);
1927 : }
1928 : void pextq(Register dst, Register src1, const Operand& src2) {
1929 : bmi2q(kF3, 0xf5, dst, src1, src2);
1930 : }
1931 : void pextl(Register dst, Register src1, Register src2) {
1932 : bmi2l(kF3, 0xf5, dst, src1, src2);
1933 : }
1934 : void pextl(Register dst, Register src1, const Operand& src2) {
1935 : bmi2l(kF3, 0xf5, dst, src1, src2);
1936 : }
1937 : void sarxq(Register dst, Register src1, Register src2) {
1938 : bmi2q(kF3, 0xf7, dst, src2, src1);
1939 : }
1940 : void sarxq(Register dst, const Operand& src1, Register src2) {
1941 : bmi2q(kF3, 0xf7, dst, src2, src1);
1942 : }
1943 : void sarxl(Register dst, Register src1, Register src2) {
1944 : bmi2l(kF3, 0xf7, dst, src2, src1);
1945 : }
1946 : void sarxl(Register dst, const Operand& src1, Register src2) {
1947 : bmi2l(kF3, 0xf7, dst, src2, src1);
1948 : }
1949 : void shlxq(Register dst, Register src1, Register src2) {
1950 : bmi2q(k66, 0xf7, dst, src2, src1);
1951 : }
1952 : void shlxq(Register dst, const Operand& src1, Register src2) {
1953 : bmi2q(k66, 0xf7, dst, src2, src1);
1954 : }
1955 : void shlxl(Register dst, Register src1, Register src2) {
1956 : bmi2l(k66, 0xf7, dst, src2, src1);
1957 : }
1958 : void shlxl(Register dst, const Operand& src1, Register src2) {
1959 : bmi2l(k66, 0xf7, dst, src2, src1);
1960 : }
1961 : void shrxq(Register dst, Register src1, Register src2) {
1962 : bmi2q(kF2, 0xf7, dst, src2, src1);
1963 : }
1964 : void shrxq(Register dst, const Operand& src1, Register src2) {
1965 : bmi2q(kF2, 0xf7, dst, src2, src1);
1966 : }
1967 : void shrxl(Register dst, Register src1, Register src2) {
1968 : bmi2l(kF2, 0xf7, dst, src2, src1);
1969 : }
1970 : void shrxl(Register dst, const Operand& src1, Register src2) {
1971 : bmi2l(kF2, 0xf7, dst, src2, src1);
1972 : }
1973 : void rorxq(Register dst, Register src, byte imm8);
1974 : void rorxq(Register dst, const Operand& src, byte imm8);
1975 : void rorxl(Register dst, Register src, byte imm8);
1976 : void rorxl(Register dst, const Operand& src, byte imm8);
1977 :
1978 : // Check the code size generated from label to here.
1979 96490 : int SizeOfCodeGeneratedSince(Label* label) {
1980 1119314 : return pc_offset() - label->pos();
1981 : }
1982 :
1983 : // Mark address of a debug break slot.
1984 : void RecordDebugBreakSlot(RelocInfo::Mode mode);
1985 :
1986 : // Record a comment relocation entry that can be used by a disassembler.
1987 : // Use --code-comments to enable.
1988 : void RecordComment(const char* msg);
1989 :
1990 : // Record a deoptimization reason that can be used by a log or cpu profiler.
1991 : // Use --trace-deopt to enable.
1992 : void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1993 : int id);
1994 :
1995 0 : void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
1996 : ConstantPoolEntry::Access access,
1997 : ConstantPoolEntry::Type type) {
1998 : // No embedded constant pool support.
1999 0 : UNREACHABLE();
2000 : }
2001 :
2002 : void RecordProtectedInstructionLanding(int pc_offset);
2003 :
2004 : // Writes a single word of data in the code stream.
2005 : // Used for inline tables, e.g., jump-tables.
2006 : void db(uint8_t data);
2007 : void dd(uint32_t data);
2008 : void dq(uint64_t data);
2009 0 : void dp(uintptr_t data) { dq(data); }
2010 : void dq(Label* label);
2011 :
2012 : // Check if there is less than kGap bytes available in the buffer.
2013 : // If this is the case, we need to grow the buffer before emitting
2014 : // an instruction or relocation information.
2015 : inline bool buffer_overflow() const {
2016 367791688 : return pc_ >= reloc_info_writer.pos() - kGap;
2017 : }
2018 :
2019 : // Get the number of bytes available in the buffer.
2020 : inline int available_space() const {
2021 : return static_cast<int>(reloc_info_writer.pos() - pc_);
2022 : }
2023 :
2024 : static bool IsNop(Address addr);
2025 :
2026 : // Avoid overflows for displacements etc.
2027 : static constexpr int kMaximalBufferSize = 512 * MB;
2028 :
2029 : byte byte_at(int pos) { return buffer_[pos]; }
2030 7726749 : void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
2031 :
2032 : Address pc() const { return pc_; }
2033 :
2034 : protected:
2035 : // Call near indirect
2036 : void call(const Operand& operand);
2037 :
2038 : private:
2039 86757982 : byte* addr_at(int pos) { return buffer_ + pos; }
2040 52908719 : uint32_t long_at(int pos) {
2041 52908719 : return *reinterpret_cast<uint32_t*>(addr_at(pos));
2042 : }
2043 26164544 : void long_at_put(int pos, uint32_t x) {
2044 26852087 : *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
2045 : }
2046 :
2047 : // code emission
2048 : void GrowBuffer();
2049 :
2050 599361105 : void emit(byte x) { *pc_++ = x; }
2051 : inline void emitl(uint32_t x);
2052 : inline void emitp(void* x, RelocInfo::Mode rmode);
2053 : inline void emitq(uint64_t x);
2054 : inline void emitw(uint16_t x);
2055 : inline void emit_code_target(Handle<Code> target,
2056 : RelocInfo::Mode rmode,
2057 : TypeFeedbackId ast_id = TypeFeedbackId::None());
2058 : inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
2059 : inline void emit(Immediate x);
2060 :
2061 : // Emits a REX prefix that encodes a 64-bit operand size and
2062 : // the top bit of both register codes.
2063 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2064 : // REX.W is set.
2065 : inline void emit_rex_64(XMMRegister reg, Register rm_reg);
2066 : inline void emit_rex_64(Register reg, XMMRegister rm_reg);
2067 : inline void emit_rex_64(Register reg, Register rm_reg);
2068 :
2069 : // Emits a REX prefix that encodes a 64-bit operand size and
2070 : // the top bit of the destination, index, and base register codes.
2071 : // The high bit of reg is used for REX.R, the high bit of op's base
2072 : // register is used for REX.B, and the high bit of op's index register
2073 : // is used for REX.X. REX.W is set.
2074 : inline void emit_rex_64(Register reg, const Operand& op);
2075 : inline void emit_rex_64(XMMRegister reg, const Operand& op);
2076 :
2077 : // Emits a REX prefix that encodes a 64-bit operand size and
2078 : // the top bit of the register code.
2079 : // The high bit of register is used for REX.B.
2080 : // REX.W is set and REX.R and REX.X are clear.
2081 : inline void emit_rex_64(Register rm_reg);
2082 :
2083 : // Emits a REX prefix that encodes a 64-bit operand size and
2084 : // the top bit of the index and base register codes.
2085 : // The high bit of op's base register is used for REX.B, and the high
2086 : // bit of op's index register is used for REX.X.
2087 : // REX.W is set and REX.R clear.
2088 : inline void emit_rex_64(const Operand& op);
2089 :
2090 : // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
2091 : void emit_rex_64() { emit(0x48); }
2092 :
2093 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2094 : // REX.W is clear.
2095 : inline void emit_rex_32(Register reg, Register rm_reg);
2096 :
2097 : // The high bit of reg is used for REX.R, the high bit of op's base
2098 : // register is used for REX.B, and the high bit of op's index register
2099 : // is used for REX.X. REX.W is cleared.
2100 : inline void emit_rex_32(Register reg, const Operand& op);
2101 :
2102 : // High bit of rm_reg goes to REX.B.
2103 : // REX.W, REX.R and REX.X are clear.
2104 : inline void emit_rex_32(Register rm_reg);
2105 :
2106 : // High bit of base goes to REX.B and high bit of index to REX.X.
2107 : // REX.W and REX.R are clear.
2108 : inline void emit_rex_32(const Operand& op);
2109 :
2110 : // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2111 : // REX.W is cleared. If no REX bits are set, no byte is emitted.
2112 : inline void emit_optional_rex_32(Register reg, Register rm_reg);
2113 :
2114 : // The high bit of reg is used for REX.R, the high bit of op's base
2115 : // register is used for REX.B, and the high bit of op's index register
2116 : // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
2117 : // is emitted.
2118 : inline void emit_optional_rex_32(Register reg, const Operand& op);
2119 :
2120 : // As for emit_optional_rex_32(Register, Register), except that
2121 : // the registers are XMM registers.
2122 : inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2123 :
2124 : // As for emit_optional_rex_32(Register, Register), except that
2125 : // one of the registers is an XMM registers.
2126 : inline void emit_optional_rex_32(XMMRegister reg, Register base);
2127 :
2128 : // As for emit_optional_rex_32(Register, Register), except that
2129 : // one of the registers is an XMM registers.
2130 : inline void emit_optional_rex_32(Register reg, XMMRegister base);
2131 :
2132 : // As for emit_optional_rex_32(Register, const Operand&), except that
2133 : // the register is an XMM register.
2134 : inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);
2135 :
2136 : // Optionally do as emit_rex_32(Register) if the register number has
2137 : // the high bit set.
2138 : inline void emit_optional_rex_32(Register rm_reg);
2139 : inline void emit_optional_rex_32(XMMRegister rm_reg);
2140 :
2141 : // Optionally do as emit_rex_32(const Operand&) if the operand register
2142 : // numbers have a high bit set.
2143 : inline void emit_optional_rex_32(const Operand& op);
2144 :
2145 : void emit_rex(int size) {
2146 0 : if (size == kInt64Size) {
2147 : emit_rex_64();
2148 : } else {
2149 : DCHECK(size == kInt32Size);
2150 : }
2151 : }
2152 :
2153 : template<class P1>
2154 : void emit_rex(P1 p1, int size) {
2155 27245002 : if (size == kInt64Size) {
2156 : emit_rex_64(p1);
2157 : } else {
2158 : DCHECK(size == kInt32Size);
2159 : emit_optional_rex_32(p1);
2160 : }
2161 : }
2162 :
2163 : template<class P1, class P2>
2164 91885873 : void emit_rex(P1 p1, P2 p2, int size) {
2165 91885873 : if (size == kInt64Size) {
2166 : emit_rex_64(p1, p2);
2167 : } else {
2168 : DCHECK(size == kInt32Size);
2169 : emit_optional_rex_32(p1, p2);
2170 : }
2171 91885873 : }
2172 :
2173 : // Emit vex prefix
2174 : void emit_vex2_byte0() { emit(0xc5); }
2175 : inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2176 : SIMDPrefix pp);
2177 : void emit_vex3_byte0() { emit(0xc4); }
2178 : inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2179 : inline void emit_vex3_byte1(XMMRegister reg, const Operand& rm,
2180 : LeadingOpcode m);
2181 : inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2182 : SIMDPrefix pp);
2183 : inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2184 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2185 : VexW w);
2186 : inline void emit_vex_prefix(Register reg, Register v, Register rm,
2187 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2188 : VexW w);
2189 : inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, const Operand& rm,
2190 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2191 : VexW w);
2192 : inline void emit_vex_prefix(Register reg, Register v, const Operand& rm,
2193 : VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2194 : VexW w);
2195 :
2196 : // Emit the ModR/M byte, and optionally the SIB byte and
2197 : // 1- or 4-byte offset for a memory operand. Also encodes
2198 : // the second operand of the operation, a register or operation
2199 : // subcode, into the reg field of the ModR/M byte.
2200 : void emit_operand(Register reg, const Operand& adr) {
2201 75273578 : emit_operand(reg.low_bits(), adr);
2202 : }
2203 :
2204 : // Emit the ModR/M byte, and optionally the SIB byte and
2205 : // 1- or 4-byte offset for a memory operand. Also used to encode
2206 : // a three-bit opcode extension into the ModR/M byte.
2207 : void emit_operand(int rm, const Operand& adr);
2208 :
2209 : // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
2210 : void emit_modrm(Register reg, Register rm_reg) {
2211 31491597 : emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2212 : }
2213 :
2214 : // Emit a ModR/M byte with an operation subcode in the reg field and
2215 : // a register in the rm_reg field.
2216 : void emit_modrm(int code, Register rm_reg) {
2217 : DCHECK(is_uint3(code));
2218 38984914 : emit(0xC0 | code << 3 | rm_reg.low_bits());
2219 : }
2220 :
2221 : // Emit the code-object-relative offset of the label's position
2222 : inline void emit_code_relative_offset(Label* label);
2223 :
2224 : // The first argument is the reg field, the second argument is the r/m field.
2225 : void emit_sse_operand(XMMRegister dst, XMMRegister src);
2226 : void emit_sse_operand(XMMRegister reg, const Operand& adr);
2227 : void emit_sse_operand(Register reg, const Operand& adr);
2228 : void emit_sse_operand(XMMRegister dst, Register src);
2229 : void emit_sse_operand(Register dst, XMMRegister src);
2230 : void emit_sse_operand(XMMRegister dst);
2231 :
2232 : // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2233 : // AND, OR, XOR, or CMP. The encodings of these operations are all
2234 : // similar, differing just in the opcode or in the reg field of the
2235 : // ModR/M byte.
2236 : void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2237 : void arithmetic_op_8(byte opcode, Register reg, const Operand& rm_reg);
2238 : void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2239 : void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
2240 : // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2241 : void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2242 : void arithmetic_op(byte opcode,
2243 : Register reg,
2244 : const Operand& rm_reg,
2245 : int size);
2246 : // Operate on a byte in memory or register.
2247 : void immediate_arithmetic_op_8(byte subcode,
2248 : Register dst,
2249 : Immediate src);
2250 : void immediate_arithmetic_op_8(byte subcode,
2251 : const Operand& dst,
2252 : Immediate src);
2253 : // Operate on a word in memory or register.
2254 : void immediate_arithmetic_op_16(byte subcode,
2255 : Register dst,
2256 : Immediate src);
2257 : void immediate_arithmetic_op_16(byte subcode,
2258 : const Operand& dst,
2259 : Immediate src);
2260 : // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2261 : void immediate_arithmetic_op(byte subcode,
2262 : Register dst,
2263 : Immediate src,
2264 : int size);
2265 : void immediate_arithmetic_op(byte subcode,
2266 : const Operand& dst,
2267 : Immediate src,
2268 : int size);
2269 :
2270 : // Emit machine code for a shift operation.
2271 : void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2272 : void shift(Register dst, Immediate shift_amount, int subcode, int size);
2273 : // Shift dst by cl % 64 bits.
2274 : void shift(Register dst, int subcode, int size);
2275 : void shift(Operand dst, int subcode, int size);
2276 :
2277 : void emit_farith(int b1, int b2, int i);
2278 :
2279 : // labels
2280 : // void print(Label* L);
2281 : void bind_to(Label* L, int pos);
2282 :
2283 : // record reloc info for current pc_
2284 : void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2285 :
2286 : // Arithmetics
2287 : void emit_add(Register dst, Register src, int size) {
2288 1115121 : arithmetic_op(0x03, dst, src, size);
2289 : }
2290 :
2291 : void emit_add(Register dst, Immediate src, int size) {
2292 4187279 : immediate_arithmetic_op(0x0, dst, src, size);
2293 : }
2294 :
2295 : void emit_add(Register dst, const Operand& src, int size) {
2296 40463 : arithmetic_op(0x03, dst, src, size);
2297 : }
2298 :
2299 : void emit_add(const Operand& dst, Register src, int size) {
2300 184425 : arithmetic_op(0x1, src, dst, size);
2301 : }
2302 :
2303 : void emit_add(const Operand& dst, Immediate src, int size) {
2304 2283927 : immediate_arithmetic_op(0x0, dst, src, size);
2305 : }
2306 :
2307 : void emit_and(Register dst, Register src, int size) {
2308 1719194 : arithmetic_op(0x23, dst, src, size);
2309 : }
2310 :
2311 : void emit_and(Register dst, const Operand& src, int size) {
2312 85791 : arithmetic_op(0x23, dst, src, size);
2313 : }
2314 :
2315 : void emit_and(const Operand& dst, Register src, int size) {
2316 : arithmetic_op(0x21, src, dst, size);
2317 : }
2318 :
2319 : void emit_and(Register dst, Immediate src, int size) {
2320 2163407 : immediate_arithmetic_op(0x4, dst, src, size);
2321 : }
2322 :
2323 : void emit_and(const Operand& dst, Immediate src, int size) {
2324 0 : immediate_arithmetic_op(0x4, dst, src, size);
2325 : }
2326 :
2327 : void emit_cmp(Register dst, Register src, int size) {
2328 1763449 : arithmetic_op(0x3B, dst, src, size);
2329 : }
2330 :
2331 : void emit_cmp(Register dst, const Operand& src, int size) {
2332 3844828 : arithmetic_op(0x3B, dst, src, size);
2333 : }
2334 :
2335 : void emit_cmp(const Operand& dst, Register src, int size) {
2336 1563576 : arithmetic_op(0x39, src, dst, size);
2337 : }
2338 :
2339 : void emit_cmp(Register dst, Immediate src, int size) {
2340 2964543 : immediate_arithmetic_op(0x7, dst, src, size);
2341 : }
2342 :
2343 : void emit_cmp(const Operand& dst, Immediate src, int size) {
2344 264287 : immediate_arithmetic_op(0x7, dst, src, size);
2345 : }
2346 :
2347 : // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2348 : // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2349 : // operation is only atomic if prefixed by the lock instruction.
2350 : void emit_cmpxchg(const Operand& dst, Register src, int size);
2351 :
2352 : void emit_dec(Register dst, int size);
2353 : void emit_dec(const Operand& dst, int size);
2354 :
2355 : // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2356 : // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2357 : // when size is 32.
2358 : void emit_idiv(Register src, int size);
2359 : void emit_div(Register src, int size);
2360 :
2361 : // Signed multiply instructions.
2362 : // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2363 : void emit_imul(Register src, int size);
2364 : void emit_imul(const Operand& src, int size);
2365 : void emit_imul(Register dst, Register src, int size);
2366 : void emit_imul(Register dst, const Operand& src, int size);
2367 : void emit_imul(Register dst, Register src, Immediate imm, int size);
2368 : void emit_imul(Register dst, const Operand& src, Immediate imm, int size);
2369 :
2370 : void emit_inc(Register dst, int size);
2371 : void emit_inc(const Operand& dst, int size);
2372 :
2373 : void emit_lea(Register dst, const Operand& src, int size);
2374 :
2375 : void emit_mov(Register dst, const Operand& src, int size);
2376 : void emit_mov(Register dst, Register src, int size);
2377 : void emit_mov(const Operand& dst, Register src, int size);
2378 : void emit_mov(Register dst, Immediate value, int size);
2379 : void emit_mov(const Operand& dst, Immediate value, int size);
2380 :
2381 : void emit_movzxb(Register dst, const Operand& src, int size);
2382 : void emit_movzxb(Register dst, Register src, int size);
2383 : void emit_movzxw(Register dst, const Operand& src, int size);
2384 : void emit_movzxw(Register dst, Register src, int size);
2385 :
2386 : void emit_neg(Register dst, int size);
2387 : void emit_neg(const Operand& dst, int size);
2388 :
2389 : void emit_not(Register dst, int size);
2390 : void emit_not(const Operand& dst, int size);
2391 :
2392 : void emit_or(Register dst, Register src, int size) {
2393 484026 : arithmetic_op(0x0B, dst, src, size);
2394 : }
2395 :
2396 : void emit_or(Register dst, const Operand& src, int size) {
2397 7835 : arithmetic_op(0x0B, dst, src, size);
2398 : }
2399 :
2400 : void emit_or(const Operand& dst, Register src, int size) {
2401 : arithmetic_op(0x9, src, dst, size);
2402 : }
2403 :
2404 : void emit_or(Register dst, Immediate src, int size) {
2405 10743 : immediate_arithmetic_op(0x1, dst, src, size);
2406 : }
2407 :
2408 : void emit_or(const Operand& dst, Immediate src, int size) {
2409 0 : immediate_arithmetic_op(0x1, dst, src, size);
2410 : }
2411 :
2412 : void emit_repmovs(int size);
2413 :
2414 : void emit_sbb(Register dst, Register src, int size) {
2415 3424 : arithmetic_op(0x1b, dst, src, size);
2416 : }
2417 :
2418 : void emit_sub(Register dst, Register src, int size) {
2419 381579 : arithmetic_op(0x2B, dst, src, size);
2420 : }
2421 :
2422 : void emit_sub(Register dst, Immediate src, int size) {
2423 3496250 : immediate_arithmetic_op(0x5, dst, src, size);
2424 : }
2425 :
2426 : void emit_sub(Register dst, const Operand& src, int size) {
2427 193358 : arithmetic_op(0x2B, dst, src, size);
2428 : }
2429 :
2430 : void emit_sub(const Operand& dst, Register src, int size) {
2431 184425 : arithmetic_op(0x29, src, dst, size);
2432 : }
2433 :
2434 : void emit_sub(const Operand& dst, Immediate src, int size) {
2435 6432 : immediate_arithmetic_op(0x5, dst, src, size);
2436 : }
2437 :
2438 : void emit_test(Register dst, Register src, int size);
2439 : void emit_test(Register reg, Immediate mask, int size);
2440 : void emit_test(const Operand& op, Register reg, int size);
2441 : void emit_test(const Operand& op, Immediate mask, int size);
2442 : void emit_test(Register reg, const Operand& op, int size) {
2443 377 : return emit_test(op, reg, size);
2444 : }
2445 :
2446 : void emit_xchg(Register dst, Register src, int size);
2447 : void emit_xchg(Register dst, const Operand& src, int size);
2448 :
2449 2797930 : void emit_xor(Register dst, Register src, int size) {
2450 2894440 : if (size == kInt64Size && dst.code() == src.code()) {
2451 : // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2452 : // there is no need to make this a 64 bit operation.
2453 1412 : arithmetic_op(0x33, dst, src, kInt32Size);
2454 : } else {
2455 2796518 : arithmetic_op(0x33, dst, src, size);
2456 : }
2457 2797929 : }
2458 :
2459 : void emit_xor(Register dst, const Operand& src, int size) {
2460 1465 : arithmetic_op(0x33, dst, src, size);
2461 : }
2462 :
2463 : void emit_xor(Register dst, Immediate src, int size) {
2464 20328 : immediate_arithmetic_op(0x6, dst, src, size);
2465 : }
2466 :
2467 : void emit_xor(const Operand& dst, Immediate src, int size) {
2468 0 : immediate_arithmetic_op(0x6, dst, src, size);
2469 : }
2470 :
2471 : void emit_xor(const Operand& dst, Register src, int size) {
2472 20696 : arithmetic_op(0x31, src, dst, size);
2473 : }
2474 :
2475 : // Most BMI instructions are similiar.
2476 : void bmi1q(byte op, Register reg, Register vreg, Register rm);
2477 : void bmi1q(byte op, Register reg, Register vreg, const Operand& rm);
2478 : void bmi1l(byte op, Register reg, Register vreg, Register rm);
2479 : void bmi1l(byte op, Register reg, Register vreg, const Operand& rm);
2480 : void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2481 : void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg,
2482 : const Operand& rm);
2483 : void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2484 : void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg,
2485 : const Operand& rm);
2486 :
2487 : friend class CodePatcher;
2488 : friend class EnsureSpace;
2489 : friend class RegExpMacroAssemblerX64;
2490 :
2491 : // code generation
2492 : RelocInfoWriter reloc_info_writer;
2493 :
2494 : // Internal reference positions, required for (potential) patching in
2495 : // GrowBuffer(); contains only those internal references whose labels
2496 : // are already bound.
2497 : std::deque<int> internal_reference_positions_;
2498 :
2499 : List< Handle<Code> > code_targets_;
2500 : };
2501 :
2502 :
2503 : // Helper class that ensures that there is enough space for generating
2504 : // instructions and relocation information. The constructor makes
2505 : // sure that there is enough space and (in debug mode) the destructor
2506 : // checks that we did not generate too much.
2507 : class EnsureSpace BASE_EMBEDDED {
2508 : public:
2509 : explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2510 367791688 : if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2511 : #ifdef DEBUG
2512 : space_before_ = assembler_->available_space();
2513 : #endif
2514 : }
2515 :
2516 : #ifdef DEBUG
2517 : ~EnsureSpace() {
2518 : int bytes_generated = space_before_ - assembler_->available_space();
2519 : DCHECK(bytes_generated < assembler_->kGap);
2520 : }
2521 : #endif
2522 :
2523 : private:
2524 : Assembler* assembler_;
2525 : #ifdef DEBUG
2526 : int space_before_;
2527 : #endif
2528 : };
2529 :
2530 : } // namespace internal
2531 : } // namespace v8
2532 :
2533 : #endif // V8_X64_ASSEMBLER_X64_H_
|