/src/keystone/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #include "X86AsmInstrumentation.h" |
11 | | #include "MCTargetDesc/X86BaseInfo.h" |
12 | | #include "X86Operand.h" |
13 | | #include "llvm/ADT/StringExtras.h" |
14 | | #include "llvm/ADT/Triple.h" |
15 | | #include "llvm/MC/MCAsmInfo.h" |
16 | | #include "llvm/MC/MCContext.h" |
17 | | #include "llvm/MC/MCInst.h" |
18 | | #include "llvm/MC/MCInstBuilder.h" |
19 | | #include "llvm/MC/MCInstrInfo.h" |
20 | | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" |
21 | | #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
22 | | #include "llvm/MC/MCStreamer.h" |
23 | | #include "llvm/MC/MCSubtargetInfo.h" |
24 | | #include "llvm/MC/MCTargetOptions.h" |
25 | | #include <algorithm> |
26 | | #include <cassert> |
27 | | #include <vector> |
28 | | |
29 | | //#include <iostream> |
30 | | |
31 | | // Following comment describes how assembly instrumentation works. |
32 | | // Currently we have only AddressSanitizer instrumentation, but we're |
33 | | // planning to implement MemorySanitizer for inline assembly too. If |
34 | | // you're not familiar with AddressSanitizer algorithm, please, read |
35 | | // https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm. |
36 | | // |
37 | | // When inline assembly is parsed by an instance of X86AsmParser, all |
38 | | // instructions are emitted via EmitInstruction method. That's the |
39 | | // place where X86AsmInstrumentation analyzes an instruction and |
40 | | // decides, whether the instruction should be emitted as is or |
41 | | // instrumentation is required. The latter case happens when an |
42 | | // instruction reads from or writes to memory. Now instruction opcode |
43 | | // is explicitly checked, and if an instruction has a memory operand |
44 | | // (for instance, movq (%rsi, %rcx, 8), %rax) - it should be |
45 | | // instrumented. There're also exist instructions that modify |
46 | | // memory but don't have an explicit memory operands, for instance, |
47 | | // movs. |
48 | | // |
49 | | // Let's consider at first 8-byte memory accesses when an instruction |
50 | | // has an explicit memory operand. In this case we need two registers - |
51 | | // AddressReg to compute address of a memory cells which are accessed |
52 | | // and ShadowReg to compute corresponding shadow address. So, we need |
53 | | // to spill both registers before instrumentation code and restore them |
54 | | // after instrumentation. Thus, in general, instrumentation code will |
55 | | // look like this: |
56 | | // PUSHF # Store flags, otherwise they will be overwritten |
57 | | // PUSH AddressReg # spill AddressReg |
58 | | // PUSH ShadowReg # spill ShadowReg |
59 | | // LEA MemOp, AddressReg # compute address of the memory operand |
60 | | // MOV AddressReg, ShadowReg |
61 | | // SHR ShadowReg, 3 |
62 | | // # ShadowOffset(AddressReg >> 3) contains address of a shadow |
63 | | // # corresponding to MemOp. |
64 | | // CMP ShadowOffset(ShadowReg), 0 # test shadow value |
65 | | // JZ .Done # when shadow equals to zero, everything is fine |
66 | | // MOV AddressReg, RDI |
67 | | // # Call __asan_report function with AddressReg as an argument |
68 | | // CALL __asan_report |
69 | | // .Done: |
70 | | // POP ShadowReg # Restore ShadowReg |
71 | | // POP AddressReg # Restore AddressReg |
72 | | // POPF # Restore flags |
73 | | // |
74 | | // Memory accesses with different size (1-, 2-, 4- and 16-byte) are |
75 | | // handled in a similar manner, but small memory accesses (less than 8 |
76 | | // byte) require an additional ScratchReg, which is used for shadow value. |
77 | | // |
78 | | // If, suppose, we're instrumenting an instruction like movs, only |
79 | | // contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize * |
80 | | // RCX are checked. In this case there're no need to spill and restore |
81 | | // AddressReg , ShadowReg or flags four times, they're saved on stack |
82 | | // just once, before instrumentation of these four addresses, and restored |
83 | | // at the end of the instrumentation. |
84 | | // |
85 | | // There exist several things which complicate this simple algorithm. |
86 | | // * Instrumented memory operand can have RSP as a base or an index |
87 | | // register. So we need to add a constant offset before computation |
88 | | // of memory address, since flags, AddressReg, ShadowReg, etc. were |
89 | | // already stored on stack and RSP was modified. |
90 | | // * Debug info (usually, DWARF) should be adjusted, because sometimes |
91 | | // RSP is used as a frame register. So, we need to select some |
92 | | // register as a frame register and temprorary override current CFA |
93 | | // register. |
94 | | |
95 | | namespace llvm_ks { |
96 | | |
97 | | X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI) |
98 | 15.3k | : STI(STI), InitialFrameReg(0) {} |
99 | | |
100 | 15.3k | X86AsmInstrumentation::~X86AsmInstrumentation() {} |
101 | | |
102 | | void X86AsmInstrumentation::InstrumentAndEmitInstruction( |
103 | | MCInst &Inst, OperandVector &Operands, MCContext &Ctx, |
104 | 48.0k | const MCInstrInfo &MII, MCStreamer &Out, unsigned int &KsError) { |
105 | 48.0k | EmitInstruction(Out, Inst, KsError); |
106 | 48.0k | } |
107 | | |
108 | | void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, |
109 | | MCInst &Inst, |
110 | 48.0k | unsigned int &KsError) { |
111 | 48.0k | Out.EmitInstruction(Inst, *STI, KsError); |
112 | 48.0k | } |
113 | | |
114 | | unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx, |
115 | 0 | MCStreamer &Out) { |
116 | 0 | if (!Out.getNumFrameInfos()) // No active dwarf frame |
117 | 0 | return X86::NoRegister; |
118 | 0 | const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back(); |
119 | 0 | if (Frame.End) // Active dwarf frame is closed |
120 | 0 | return X86::NoRegister; |
121 | 0 | const MCRegisterInfo *MRI = Ctx.getRegisterInfo(); |
122 | 0 | if (!MRI) // No register info |
123 | 0 | return X86::NoRegister; |
124 | | |
125 | 0 | if (InitialFrameReg) { |
126 | | // FrameReg is set explicitly, we're instrumenting a MachineFunction. |
127 | 0 | return InitialFrameReg; |
128 | 0 | } |
129 | | |
130 | 0 | return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */); |
131 | 0 | } |
132 | | |
133 | | X86AsmInstrumentation * |
134 | | CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, |
135 | 15.3k | const MCContext &Ctx, const MCSubtargetInfo *&STI) { |
136 | 15.3k | return new X86AsmInstrumentation(STI); |
137 | 15.3k | } |
138 | | |
139 | | } // end llvm namespace |