/src/bloaty/src/disassemble.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2016 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include <string> |
16 | | |
17 | | #include "absl/strings/ascii.h" |
18 | | #include "absl/strings/escaping.h" |
19 | | #include "absl/strings/str_cat.h" |
20 | | #include "absl/strings/string_view.h" |
21 | | #include "absl/strings/substitute.h" |
22 | | #include "bloaty.h" |
23 | | #include "capstone/capstone.h" |
24 | | #include "re.h" |
25 | | #include "util.h" |
26 | | |
27 | | using absl::string_view; |
28 | | |
29 | | namespace bloaty { |
30 | | |
31 | | namespace { |
32 | | |
33 | 0 | static std::string RightPad(const std::string& input, size_t size) { |
34 | 0 | std::string ret = input; |
35 | 0 | while (ret.size() < size) { |
36 | 0 | ret += " "; |
37 | 0 | } |
38 | 0 | return ret; |
39 | 0 | } |
40 | | |
41 | | } // anonymous namespace |
42 | | |
43 | 1.05M | void DisassembleFindReferences(const DisassemblyInfo& info, RangeSink* sink) { |
44 | 1.05M | if (info.arch != CS_ARCH_X86) { |
45 | | // x86 only for now. |
46 | 3.48k | return; |
47 | 3.48k | } |
48 | | |
49 | 1.04M | csh handle; |
50 | 1.04M | if (cs_open(info.arch, info.mode, &handle) != CS_ERR_OK || |
51 | 1.04M | cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) { |
52 | 0 | THROW("Couldn't initialize Capstone"); |
53 | 0 | } |
54 | | |
55 | 1.04M | if (info.text.size() == 0) { |
56 | 0 | cs_close(&handle); |
57 | 0 | THROW("Tried to disassemble empty function."); |
58 | 0 | } |
59 | | |
60 | 1.04M | cs_insn *in = cs_malloc(handle); |
61 | 1.04M | uint64_t address = info.start_address; |
62 | 1.04M | const uint8_t* ptr = reinterpret_cast<const uint8_t*>(info.text.data()); |
63 | 1.04M | size_t size = info.text.size(); |
64 | | |
65 | 171M | while (size > 0) { |
66 | 170M | if (!cs_disasm_iter(handle, &ptr, &size, &address, in)) { |
67 | | // Some symbols that end up in the .text section aren't really functions |
68 | | // but data. Not sure why this happens. |
69 | 770k | if (verbose_level > 1) { |
70 | 0 | printf("Error disassembling function at address: %" PRIx64 "\n", |
71 | 0 | address); |
72 | 0 | } |
73 | 770k | goto cleanup; |
74 | 770k | } |
75 | | |
76 | 170M | size_t count = in->detail->x86.op_count; |
77 | 459M | for (size_t i = 0; i < count; i++) { |
78 | 289M | cs_x86_op* op = &in->detail->x86.operands[i]; |
79 | 289M | if (op->type == X86_OP_MEM && op->mem.base == X86_REG_RIP && |
80 | 289M | op->mem.segment == X86_REG_INVALID && |
81 | 289M | op->mem.index == X86_REG_INVALID) { |
82 | 6.78M | uint64_t to_address = in->address + in->size + op->mem.disp; |
83 | 6.78M | if (to_address) { |
84 | 6.78M | sink->AddVMRangeForVMAddr("x86_disassemble", in->address, to_address, |
85 | 6.78M | RangeSink::kUnknownSize); |
86 | 6.78M | } |
87 | 6.78M | } |
88 | 289M | } |
89 | 170M | } |
90 | | |
91 | 1.04M | cleanup: |
92 | 1.04M | cs_free(in, 1); |
93 | 1.04M | cs_close(&handle); |
94 | 1.04M | } |
95 | | |
96 | 0 | bool TryGetJumpTarget(cs_arch arch, cs_insn *in, uint64_t* target) { |
97 | 0 | switch (arch) { |
98 | 0 | case CS_ARCH_X86: |
99 | 0 | switch (in->id) { |
100 | 0 | case X86_INS_JAE: |
101 | 0 | case X86_INS_JA: |
102 | 0 | case X86_INS_JBE: |
103 | 0 | case X86_INS_JB: |
104 | 0 | case X86_INS_JCXZ: |
105 | 0 | case X86_INS_JECXZ: |
106 | 0 | case X86_INS_JE: |
107 | 0 | case X86_INS_JGE: |
108 | 0 | case X86_INS_JG: |
109 | 0 | case X86_INS_JLE: |
110 | 0 | case X86_INS_JL: |
111 | 0 | case X86_INS_JMP: |
112 | 0 | case X86_INS_JNE: |
113 | 0 | case X86_INS_JNO: |
114 | 0 | case X86_INS_JNP: |
115 | 0 | case X86_INS_JNS: |
116 | 0 | case X86_INS_JO: |
117 | 0 | case X86_INS_JP: |
118 | 0 | case X86_INS_JS: |
119 | 0 | case X86_INS_CALL: { |
120 | 0 | auto op0 = in->detail->x86.operands[0]; |
121 | 0 | if (op0.type == X86_OP_IMM) { |
122 | 0 | *target = op0.imm; |
123 | 0 | return true; |
124 | 0 | } |
125 | 0 | return false; |
126 | 0 | } |
127 | 0 | default: |
128 | 0 | return false; |
129 | 0 | } |
130 | 0 | default: |
131 | 0 | return false; |
132 | 0 | } |
133 | 0 | } |
134 | | |
135 | 0 | std::string DisassembleFunction(const DisassemblyInfo& info) { |
136 | 0 | std::string ret; |
137 | |
|
138 | 0 | csh handle; |
139 | 0 | if (cs_open(info.arch, info.mode, &handle) != CS_ERR_OK || |
140 | 0 | cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != CS_ERR_OK) { |
141 | 0 | THROW("Couldn't initialize Capstone"); |
142 | 0 | } |
143 | | |
144 | 0 | if (info.text.size() == 0) { |
145 | 0 | THROW("Tried to disassemble empty function."); |
146 | 0 | } |
147 | | |
148 | 0 | cs_insn *insn; |
149 | 0 | size_t count = |
150 | 0 | cs_disasm(handle, reinterpret_cast<const uint8_t *>(info.text.data()), |
151 | 0 | info.text.size(), info.start_address, 0, &insn); |
152 | |
|
153 | 0 | if (count == 0) { |
154 | 0 | THROW("Error disassembling function."); |
155 | 0 | } |
156 | | |
157 | 0 | std::map<uint64_t, int> local_labels; |
158 | |
|
159 | 0 | for (size_t i = 0; i < count; i++) { |
160 | 0 | cs_insn *in = insn + i; |
161 | 0 | uint64_t target; |
162 | 0 | if (TryGetJumpTarget(info.arch, in, &target) && |
163 | 0 | target >= info.start_address && |
164 | 0 | target < info.start_address + info.text.size()) { |
165 | 0 | local_labels[target] = 0; // Fill in real value later. |
166 | 0 | } |
167 | 0 | } |
168 | |
|
169 | 0 | int label = 0; |
170 | 0 | for (auto& pair : local_labels) { |
171 | 0 | pair.second = label++; |
172 | 0 | } |
173 | |
|
174 | 0 | for (size_t i = 0; i < count; i++) { |
175 | 0 | cs_insn *in = insn + i; |
176 | 0 | std::string bytes = absl::BytesToHexString( |
177 | 0 | string_view(reinterpret_cast<const char*>(in->bytes), in->size)); |
178 | 0 | string_view mnemonic(in->mnemonic); |
179 | 0 | std::string op_str(in->op_str); |
180 | 0 | std::string match; |
181 | 0 | std::string label; |
182 | |
|
183 | 0 | if (info.arch == CS_ARCH_X86) { |
184 | 0 | if (in->id == X86_INS_LEA) { |
185 | 0 | ReImpl::GlobalReplace(&op_str, "\\w?word ptr ", ""); |
186 | 0 | } else if (in->id == X86_INS_NOP) { |
187 | 0 | op_str.clear(); |
188 | 0 | } else { |
189 | | // qword ptr => QWORD |
190 | 0 | while (ReImpl::PartialMatch(op_str, "(\\w?word) ptr", &match)) { |
191 | 0 | std::string upper_match = match; |
192 | 0 | absl::AsciiStrToUpper(&upper_match); |
193 | 0 | ReImpl::Replace(&op_str, match + " ptr", upper_match); |
194 | 0 | } |
195 | 0 | } |
196 | 0 | } |
197 | |
|
198 | 0 | ReImpl::GlobalReplace(&op_str, " ", ""); |
199 | |
|
200 | 0 | auto iter = local_labels.find(in->address); |
201 | 0 | if (iter != local_labels.end()) { |
202 | 0 | label = std::to_string(iter->second) + ":"; |
203 | 0 | } |
204 | |
|
205 | 0 | uint64_t target; |
206 | 0 | if (TryGetJumpTarget(info.arch, in, &target)) { |
207 | 0 | auto iter = local_labels.find(target); |
208 | 0 | std::string label; |
209 | 0 | if (iter != local_labels.end()) { |
210 | 0 | if (target > in->address) { |
211 | 0 | op_str = ">" + std::to_string(iter->second); |
212 | 0 | } else { |
213 | 0 | op_str = "<" + std::to_string(iter->second); |
214 | 0 | } |
215 | 0 | } else if (info.symbol_map.vm_map.TryGetLabel(target, &label)) { |
216 | 0 | op_str = label; |
217 | 0 | } |
218 | 0 | } |
219 | |
|
220 | 0 | absl::StrAppend(&ret, " ", RightPad(label, 4), |
221 | 0 | RightPad(std::string(mnemonic), 8), " ", op_str, "\n"); |
222 | 0 | } |
223 | |
|
224 | 0 | cs_close(&handle); |
225 | 0 | return ret; |
226 | 0 | } |
227 | | |
228 | | } // namespace bloaty |