Coverage Report

Created: 2025-08-25 07:49

/src/keystone/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
#include "X86AsmInstrumentation.h"
11
#include "MCTargetDesc/X86BaseInfo.h"
12
#include "X86Operand.h"
13
#include "llvm/ADT/StringExtras.h"
14
#include "llvm/ADT/Triple.h"
15
#include "llvm/MC/MCAsmInfo.h"
16
#include "llvm/MC/MCContext.h"
17
#include "llvm/MC/MCInst.h"
18
#include "llvm/MC/MCInstBuilder.h"
19
#include "llvm/MC/MCInstrInfo.h"
20
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
21
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
22
#include "llvm/MC/MCStreamer.h"
23
#include "llvm/MC/MCSubtargetInfo.h"
24
#include "llvm/MC/MCTargetOptions.h"
25
#include <algorithm>
26
#include <cassert>
27
#include <vector>
28
29
//#include <iostream>
30
31
// Following comment describes how assembly instrumentation works.
32
// Currently we have only AddressSanitizer instrumentation, but we're
33
// planning to implement MemorySanitizer for inline assembly too. If
34
// you're not familiar with AddressSanitizer algorithm, please, read
35
// https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm.
36
//
37
// When inline assembly is parsed by an instance of X86AsmParser, all
38
// instructions are emitted via EmitInstruction method. That's the
39
// place where X86AsmInstrumentation analyzes an instruction and
40
// decides, whether the instruction should be emitted as is or
41
// instrumentation is required. The latter case happens when an
42
// instruction reads from or writes to memory. Now instruction opcode
43
// is explicitly checked, and if an instruction has a memory operand
44
// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be
45
// instrumented.  There're also exist instructions that modify
46
// memory but don't have an explicit memory operands, for instance,
47
// movs.
48
//
49
// Let's consider at first 8-byte memory accesses when an instruction
50
// has an explicit memory operand. In this case we need two registers -
51
// AddressReg to compute address of a memory cells which are accessed
52
// and ShadowReg to compute corresponding shadow address. So, we need
53
// to spill both registers before instrumentation code and restore them
54
// after instrumentation. Thus, in general, instrumentation code will
55
// look like this:
56
// PUSHF  # Store flags, otherwise they will be overwritten
57
// PUSH AddressReg  # spill AddressReg
58
// PUSH ShadowReg   # spill ShadowReg
59
// LEA MemOp, AddressReg  # compute address of the memory operand
60
// MOV AddressReg, ShadowReg
61
// SHR ShadowReg, 3
62
// # ShadowOffset(AddressReg >> 3) contains address of a shadow
63
// # corresponding to MemOp.
64
// CMP ShadowOffset(ShadowReg), 0  # test shadow value
65
// JZ .Done  # when shadow equals to zero, everything is fine
66
// MOV AddressReg, RDI
67
// # Call __asan_report function with AddressReg as an argument
68
// CALL __asan_report
69
// .Done:
70
// POP ShadowReg  # Restore ShadowReg
71
// POP AddressReg  # Restore AddressReg
72
// POPF  # Restore flags
73
//
74
// Memory accesses with different size (1-, 2-, 4- and 16-byte) are
75
// handled in a similar manner, but small memory accesses (less than 8
76
// byte) require an additional ScratchReg, which is used for shadow value.
77
//
78
// If, suppose, we're instrumenting an instruction like movs, only
79
// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize *
80
// RCX are checked.  In this case there're no need to spill and restore
81
// AddressReg , ShadowReg or flags four times, they're saved on stack
82
// just once, before instrumentation of these four addresses, and restored
83
// at the end of the instrumentation.
84
//
85
// There exist several things which complicate this simple algorithm.
86
// * Instrumented memory operand can have RSP as a base or an index
87
//   register.  So we need to add a constant offset before computation
88
//   of memory address, since flags, AddressReg, ShadowReg, etc. were
89
//   already stored on stack and RSP was modified.
90
// * Debug info (usually, DWARF) should be adjusted, because sometimes
91
//   RSP is used as a frame register. So, we need to select some
92
//   register as a frame register and temprorary override current CFA
93
//   register.
94
95
namespace llvm_ks {
96
97
X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
98
15.3k
    : STI(STI), InitialFrameReg(0) {}
99
100
15.3k
X86AsmInstrumentation::~X86AsmInstrumentation() {}
101
102
void X86AsmInstrumentation::InstrumentAndEmitInstruction(
103
    MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
104
48.0k
    const MCInstrInfo &MII, MCStreamer &Out, unsigned int &KsError) {
105
48.0k
  EmitInstruction(Out, Inst, KsError);
106
48.0k
}
107
108
void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
109
                                            MCInst &Inst,
110
48.0k
                                            unsigned int &KsError) {
111
48.0k
  Out.EmitInstruction(Inst, *STI, KsError);
112
48.0k
}
113
114
unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
115
0
                                                   MCStreamer &Out) {
116
0
  if (!Out.getNumFrameInfos()) // No active dwarf frame
117
0
    return X86::NoRegister;
118
0
  const MCDwarfFrameInfo &Frame = Out.getDwarfFrameInfos().back();
119
0
  if (Frame.End) // Active dwarf frame is closed
120
0
    return X86::NoRegister;
121
0
  const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
122
0
  if (!MRI) // No register info
123
0
    return X86::NoRegister;
124
125
0
  if (InitialFrameReg) {
126
    // FrameReg is set explicitly, we're instrumenting a MachineFunction.
127
0
    return InitialFrameReg;
128
0
  }
129
130
0
  return MRI->getLLVMRegNum(Frame.CurrentCfaRegister, true /* IsEH */);
131
0
}
132
133
X86AsmInstrumentation *
134
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
135
15.3k
                            const MCContext &Ctx, const MCSubtargetInfo *&STI) {
136
15.3k
  return new X86AsmInstrumentation(STI);
137
15.3k
}
138
139
} // end llvm namespace