/src/bloaty/src/webassembly.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2018 Google Inc. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | #include "bloaty.h" |
16 | | #include "util.h" |
17 | | |
18 | | #include "absl/strings/substitute.h" |
19 | | |
20 | | using absl::string_view; |
21 | | |
22 | | namespace bloaty { |
23 | | namespace wasm { |
24 | | |
25 | 1.05M | uint64_t ReadLEB128Internal(bool is_signed, size_t size, string_view* data) { |
26 | 1.05M | uint64_t ret = 0; |
27 | 1.05M | int shift = 0; |
28 | 1.05M | int maxshift = 70; |
29 | 1.05M | const char* ptr = data->data(); |
30 | 1.05M | const char* limit = ptr + data->size(); |
31 | | |
32 | 1.13M | while (ptr < limit && shift < maxshift) { |
33 | 1.12M | char byte = *(ptr++); |
34 | 1.12M | ret |= static_cast<uint64_t>(byte & 0x7f) << shift; |
35 | 1.12M | shift += 7; |
36 | 1.12M | if ((byte & 0x80) == 0) { |
37 | 1.05M | data->remove_prefix(ptr - data->data()); |
38 | 1.05M | if (is_signed && shift < size && (byte & 0x40)) { |
39 | 0 | ret |= -(1ULL << shift); |
40 | 0 | } |
41 | 1.05M | return ret; |
42 | 1.05M | } |
43 | 1.12M | } |
44 | | |
45 | 1.05M | THROW("corrupt wasm data, unterminated LEB128"); |
46 | 1.05M | } |
47 | | |
48 | 1.72k | bool ReadVarUInt1(string_view* data) { |
49 | 1.72k | return static_cast<bool>(ReadLEB128Internal(false, 1, data)); |
50 | 1.72k | } |
51 | | |
52 | 452k | uint8_t ReadVarUInt7(string_view* data) { |
53 | 452k | return static_cast<char>(ReadLEB128Internal(false, 7, data)); |
54 | 452k | } |
55 | | |
56 | 604k | uint32_t ReadVarUInt32(string_view* data) { |
57 | 604k | return static_cast<uint32_t>(ReadLEB128Internal(false, 32, data)); |
58 | 604k | } |
59 | | |
60 | 1.33k | int8_t ReadVarint7(string_view* data) { |
61 | 1.33k | return static_cast<int8_t>(ReadLEB128Internal(true, 7, data)); |
62 | 1.33k | } |
63 | | |
64 | 1.04M | string_view ReadPiece(size_t bytes, string_view* data) { |
65 | 1.04M | if(data->size() < bytes) { |
66 | 1.52k | THROW("premature EOF reading variable-length DWARF data"); |
67 | 1.52k | } |
68 | 1.04M | string_view ret = data->substr(0, bytes); |
69 | 1.04M | data->remove_prefix(bytes); |
70 | 1.04M | return ret; |
71 | 1.04M | } |
72 | | |
73 | 605k | bool ReadMagic(string_view* data) { |
74 | 605k | const uint32_t wasm_magic = 0x6d736100; |
75 | 605k | auto magic = ReadFixed<uint32_t>(data); |
76 | | |
77 | 605k | if (magic != wasm_magic) { |
78 | 507k | return false; |
79 | 507k | } |
80 | | |
81 | | // TODO(haberman): do we need to fail if this is >1? |
82 | 97.8k | auto version = ReadFixed<uint32_t>(data); |
83 | 97.8k | (void)version; |
84 | | |
85 | 97.8k | return true; |
86 | 605k | } |
87 | | |
88 | | class Section { |
89 | | public: |
90 | | uint32_t id; |
91 | | std::string name; |
92 | | string_view data; |
93 | | string_view contents; |
94 | | |
95 | 452k | static Section Read(string_view* data_param) { |
96 | 452k | Section ret; |
97 | 452k | string_view data = *data_param; |
98 | 452k | string_view section_data = data; |
99 | | |
100 | 452k | ret.id = ReadVarUInt7(&data); |
101 | 452k | uint32_t size = ReadVarUInt32(&data); |
102 | 452k | ret.contents = ReadPiece(size, &data); |
103 | 452k | size_t header_size = ret.contents.data() - section_data.data(); |
104 | 452k | ret.data = ReadPiece(size + header_size, §ion_data); |
105 | | |
106 | 452k | if (ret.id == 0) { |
107 | 131k | uint32_t name_len = ReadVarUInt32(&ret.contents); |
108 | 131k | ret.name = std::string(ReadPiece(name_len, &ret.contents)); |
109 | 320k | } else if (ret.id <= 13) { |
110 | 318k | ret.name = names[ret.id]; |
111 | 318k | } else { |
112 | 2.06k | THROWF("Unknown section id: $0", ret.id); |
113 | 2.06k | } |
114 | | |
115 | 450k | *data_param = data; |
116 | 450k | return ret; |
117 | 452k | } |
118 | | |
119 | | enum Name { |
120 | | kType = 1, |
121 | | kImport = 2, |
122 | | kFunction = 3, |
123 | | kTable = 4, |
124 | | kMemory = 5, |
125 | | kGlobal = 6, |
126 | | kExport = 7, |
127 | | kStart = 8, |
128 | | kElement = 9, |
129 | | kCode = 10, |
130 | | kData = 11, |
131 | | kDataCount = 12, |
132 | | kEvent = 13, |
133 | | }; |
134 | | |
135 | | static const char* names[]; |
136 | | }; |
137 | | |
138 | | const char* Section::names[] = { |
139 | | "<none>", // 0 |
140 | | "Type", // 1 |
141 | | "Import", // 2 |
142 | | "Function", // 3 |
143 | | "Table", // 4 |
144 | | "Memory", // 5 |
145 | | "Global", // 6 |
146 | | "Export", // 7 |
147 | | "Start", // 8 |
148 | | "Element", // 9 |
149 | | "Code", // 10 |
150 | | "Data", // 11 |
151 | | "DataCount", // 12 |
152 | | "Event", // 13 |
153 | | }; |
154 | | |
155 | | struct ExternalKind { |
156 | | enum Kind { |
157 | | kFunction = 0, |
158 | | kTable = 1, |
159 | | kMemory = 2, |
160 | | kGlobal = 3, |
161 | | }; |
162 | | }; |
163 | | |
164 | | template <class Func> |
165 | 56.3k | void ForEachSection(string_view file, Func&& section_func) { |
166 | 56.3k | string_view data = file; |
167 | 56.3k | ReadMagic(&data); |
168 | | |
169 | 508k | while (!data.empty()) { |
170 | 452k | Section section = Section::Read(&data); |
171 | 452k | section_func(section); |
172 | 452k | } |
173 | 56.3k | } webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSections(bloaty::RangeSink*)::$_0>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSections(bloaty::RangeSink*)::$_0&&) Line | Count | Source | 165 | 26.5k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 26.5k | string_view data = file; | 167 | 26.5k | ReadMagic(&data); | 168 | | | 169 | 261k | while (!data.empty()) { | 170 | 234k | Section section = Section::Read(&data); | 171 | 234k | section_func(section); | 172 | 234k | } | 173 | 26.5k | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_1>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_1&&) Line | Count | Source | 165 | 2.90k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 2.90k | string_view data = file; | 167 | 2.90k | ReadMagic(&data); | 168 | | | 169 | 24.7k | while (!data.empty()) { | 170 | 21.8k | Section section = Section::Read(&data); | 171 | 21.8k | section_func(section); | 172 | 21.8k | } | 173 | 2.90k | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_2>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::ParseSymbols(bloaty::RangeSink*)::$_2&&) Line | Count | Source | 165 | 2.90k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 2.90k | string_view data = file; | 167 | 2.90k | ReadMagic(&data); | 168 | | | 169 | 17.6k | while (!data.empty()) { | 170 | 14.7k | Section section = Section::Read(&data); | 171 | 14.7k | section_func(section); | 172 | 14.7k | } | 173 | 2.90k | } |
webassembly.cc:void bloaty::wasm::ForEachSection<bloaty::wasm::AddWebAssemblyFallback(bloaty::RangeSink*)::$_3>(std::__1::basic_string_view<char, std::__1::char_traits<char> >, bloaty::wasm::AddWebAssemblyFallback(bloaty::RangeSink*)::$_3&&) Line | Count | Source | 165 | 23.9k | void ForEachSection(string_view file, Func&& section_func) { | 166 | 23.9k | string_view data = file; | 167 | 23.9k | ReadMagic(&data); | 168 | | | 169 | 204k | while (!data.empty()) { | 170 | 180k | Section section = Section::Read(&data); | 171 | 180k | section_func(section); | 172 | 180k | } | 173 | 23.9k | } |
|
174 | | |
175 | 26.5k | void ParseSections(RangeSink* sink) { |
176 | 231k | ForEachSection(sink->input_file().data(), [sink](const Section& section) { |
177 | 231k | sink->AddFileRange("wasm_sections", section.name, section.data); |
178 | 231k | }); |
179 | 26.5k | } |
180 | | |
181 | | typedef std::unordered_map<int, std::string> IndexedNames; |
182 | | |
183 | | void ReadNames(const Section& section, IndexedNames* func_names, |
184 | 0 | IndexedNames* dataseg_names, RangeSink* sink) { |
185 | 0 | enum class NameType { |
186 | 0 | kModule = 0, |
187 | 0 | kFunction = 1, |
188 | 0 | kLocal = 2, |
189 | 0 | kLabel = 3, |
190 | 0 | kType = 4, |
191 | 0 | kTable = 5, |
192 | 0 | kMemory = 6, |
193 | 0 | kGlobal = 7, |
194 | 0 | kElemSegment = 8, |
195 | 0 | kDataSegment = 9 |
196 | 0 | }; |
197 | |
|
198 | 0 | string_view data = section.contents; |
199 | |
|
200 | 0 | while (!data.empty()) { |
201 | 0 | NameType type = static_cast<NameType>(ReadVarUInt7(&data)); |
202 | 0 | uint32_t size = ReadVarUInt32(&data); |
203 | 0 | string_view section = ReadPiece(size, &data); |
204 | |
|
205 | 0 | if (type == NameType::kFunction || type == NameType::kDataSegment) { |
206 | 0 | uint32_t count = ReadVarUInt32(§ion); |
207 | 0 | for (uint32_t i = 0; i < count; i++) { |
208 | 0 | string_view entry = section; |
209 | 0 | uint32_t index = ReadVarUInt32(§ion); |
210 | 0 | uint32_t name_len = ReadVarUInt32(§ion); |
211 | 0 | string_view name = ReadPiece(name_len, §ion); |
212 | 0 | entry = StrictSubstr(entry, 0, name.data() - entry.data() + name.size()); |
213 | 0 | sink->AddFileRange("wasm_funcname", name, entry); |
214 | 0 | IndexedNames *names = (type == NameType::kFunction ? func_names : dataseg_names); |
215 | 0 | (*names)[index] = std::string(name); |
216 | 0 | } |
217 | 0 | } |
218 | 0 | } |
219 | 0 | } |
220 | | |
221 | 313 | int ReadValueType(string_view* data) { |
222 | 313 | return ReadVarint7(data); |
223 | 313 | } |
224 | | |
225 | 1.01k | int ReadElemType(string_view* data) { |
226 | 1.01k | return ReadVarint7(data); |
227 | 1.01k | } |
228 | | |
229 | 1.44k | void ReadResizableLimits(string_view* data) { |
230 | 1.44k | auto flags = ReadVarUInt1(data); |
231 | 1.44k | ReadVarUInt32(data); |
232 | 1.44k | if (flags) { |
233 | 877 | ReadVarUInt32(data); |
234 | 877 | } |
235 | 1.44k | } |
236 | | |
237 | 313 | void ReadGlobalType(string_view* data) { |
238 | 313 | ReadValueType(data); |
239 | 313 | ReadVarUInt1(data); |
240 | 313 | } |
241 | | |
242 | 1.01k | void ReadTableType(string_view* data) { |
243 | 1.01k | ReadElemType(data); |
244 | 1.01k | ReadResizableLimits(data); |
245 | 1.01k | } |
246 | | |
247 | 479 | void ReadMemoryType(string_view* data) { |
248 | 479 | ReadResizableLimits(data); |
249 | 479 | } |
250 | | |
251 | 2.27k | uint32_t GetNumFunctionImports(const Section& section) { |
252 | 2.27k | assert(section.id == Section::kImport); |
253 | 0 | string_view data = section.contents; |
254 | | |
255 | 2.27k | uint32_t count = ReadVarUInt32(&data); |
256 | 2.27k | uint32_t func_count = 0; |
257 | | |
258 | 5.54k | for (uint32_t i = 0; i < count; i++) { |
259 | 4.50k | uint32_t module_len = ReadVarUInt32(&data); |
260 | 4.50k | ReadPiece(module_len, &data); |
261 | 4.50k | uint32_t field_len = ReadVarUInt32(&data); |
262 | 4.50k | ReadPiece(field_len, &data); |
263 | 4.50k | auto kind = ReadFixed<uint8_t>(&data); |
264 | | |
265 | 4.50k | switch (kind) { |
266 | 1.80k | case ExternalKind::kFunction: |
267 | 1.80k | func_count++; |
268 | 1.80k | ReadVarUInt32(&data); |
269 | 1.80k | break; |
270 | 1.01k | case ExternalKind::kTable: |
271 | 1.01k | ReadTableType(&data); |
272 | 1.01k | break; |
273 | 479 | case ExternalKind::kMemory: |
274 | 479 | ReadMemoryType(&data); |
275 | 479 | break; |
276 | 313 | case ExternalKind::kGlobal: |
277 | 313 | ReadGlobalType(&data); |
278 | 313 | break; |
279 | 149 | default: |
280 | 149 | THROWF("Unrecognized import kind: $0", kind); |
281 | 4.50k | } |
282 | 4.50k | } |
283 | | |
284 | 1.03k | return func_count; |
285 | 2.27k | } |
286 | | |
287 | | void ReadCodeSection(const Section& section, const IndexedNames& names, |
288 | 884 | uint32_t num_imports, RangeSink* sink) { |
289 | 884 | string_view data = section.contents; |
290 | | |
291 | 884 | uint32_t count = ReadVarUInt32(&data); |
292 | | |
293 | 3.24k | for (uint32_t i = 0; i < count; i++) { |
294 | 2.36k | string_view func = data; |
295 | 2.36k | uint32_t size = ReadVarUInt32(&data); |
296 | 2.36k | uint32_t total_size = size + (data.data() - func.data()); |
297 | | |
298 | 2.36k | func = StrictSubstr(func, 0, total_size); |
299 | 2.36k | data = StrictSubstr(data, size); |
300 | | |
301 | 2.36k | auto iter = names.find(num_imports + i); |
302 | | |
303 | 2.36k | if (iter == names.end()) { |
304 | 2.06k | std::string name = "func[" + std::to_string(i) + "]"; |
305 | 2.06k | sink->AddFileRange("wasm_function", name, func); |
306 | 2.06k | } else { |
307 | 298 | sink->AddFileRange("wasm_function", ItaniumDemangle(iter->second, sink->data_source()), func); |
308 | 298 | } |
309 | 2.36k | } |
310 | 884 | } |
311 | | |
312 | | void ReadDataSection(const Section& section, const IndexedNames& names, |
313 | 1.35k | RangeSink* sink) { |
314 | 1.35k | string_view data = section.contents; |
315 | 1.35k | uint32_t count = ReadVarUInt32(&data); |
316 | | |
317 | 4.76k | for (uint32_t i = 0; i < count; i++) { |
318 | 3.63k | string_view segment = data; |
319 | 3.63k | uint8_t mode = ReadFixed<uint8_t>(&data); |
320 | 3.63k | if (mode > 1) THROW("multi-memory extension isn't supported"); |
321 | 3.41k | if (mode == 0) { // Active segment |
322 | | // We will need to read the init expr. |
323 | | // For the extended const proposal, read instructions until end is reached |
324 | | // Otherwise, just read a constexpr inst (t.const or global.get) |
325 | | // For now, we just need to support passive segments. |
326 | 2.51k | continue; |
327 | 2.51k | } |
328 | | // else, a passive segment |
329 | | |
330 | 898 | uint32_t segment_size = ReadVarUInt32(&data); |
331 | 898 | uint32_t total_size = segment_size + (data.data() - segment.data()); |
332 | | |
333 | 898 | segment = StrictSubstr(segment, 0, total_size); |
334 | 898 | data = StrictSubstr(data, segment_size); |
335 | | |
336 | 898 | auto iter = names.find(i); |
337 | 898 | if (iter == names.end()) { |
338 | 722 | std::string name = "data[" + std::to_string(i) + "]"; |
339 | 722 | sink->AddFileRange("wasm_data", name, segment); |
340 | 722 | } else { |
341 | 176 | sink->AddFileRange("wasm_data", iter->second, segment); |
342 | 176 | } |
343 | 898 | } |
344 | 1.35k | } |
345 | | |
346 | | |
347 | 2.90k | void ParseSymbols(RangeSink* sink) { |
348 | | // First pass: read the custom naming section to get function names. |
349 | 2.90k | std::unordered_map<int, std::string> func_names; |
350 | 2.90k | std::unordered_map<int, std::string> dataseg_names; |
351 | 2.90k | uint32_t num_imports = 0; |
352 | | |
353 | 2.90k | ForEachSection(sink->input_file().data(), |
354 | 21.8k | [&func_names, &dataseg_names, sink](const Section& section) { |
355 | 21.8k | if (section.name == "name") { |
356 | 0 | ReadNames(section, &func_names, &dataseg_names, sink); |
357 | 0 | } |
358 | 21.8k | }); |
359 | | |
360 | | // Second pass: read the function/code sections. |
361 | 2.90k | ForEachSection(sink->input_file().data(), |
362 | 14.7k | [&func_names, &dataseg_names, &num_imports, sink](const Section& section) { |
363 | 14.7k | if (section.id == Section::kImport) { |
364 | 2.27k | num_imports = GetNumFunctionImports(section); |
365 | 12.4k | } else if (section.id == Section::kCode) { |
366 | 884 | ReadCodeSection(section, func_names, num_imports, sink); |
367 | 11.5k | } else if (section.id == Section::kData) { |
368 | 1.35k | ReadDataSection(section, dataseg_names, sink); |
369 | 1.35k | } |
370 | 14.7k | }); |
371 | 2.90k | } |
372 | | |
373 | 23.9k | void AddWebAssemblyFallback(RangeSink* sink) { |
374 | 180k | ForEachSection(sink->input_file().data(), [sink](const Section& section) { |
375 | 180k | std::string name2 = |
376 | 180k | std::string("[section ") + std::string(section.name) + std::string("]"); |
377 | 180k | sink->AddFileRange("wasm_overhead", name2, section.data); |
378 | 180k | }); |
379 | 23.9k | sink->AddFileRange("wasm_overhead", "[WASM Header]", |
380 | 23.9k | StrictSubstr(sink->input_file().data(), 0, 8)); |
381 | 23.9k | } |
382 | | |
383 | | class WebAssemblyObjectFile : public ObjectFile { |
384 | | public: |
385 | | WebAssemblyObjectFile(std::unique_ptr<InputFile> file_data) |
386 | 41.4k | : ObjectFile(std::move(file_data)) {} |
387 | | |
388 | 41.4k | std::string GetBuildId() const override { |
389 | | // TODO(haberman): does WebAssembly support this? |
390 | 41.4k | return std::string(); |
391 | 41.4k | } |
392 | | |
393 | 20.7k | void ProcessFile(const std::vector<RangeSink*>& sinks) const override { |
394 | 38.1k | for (auto sink : sinks) { |
395 | 38.1k | switch (sink->data_source()) { |
396 | 23.6k | case DataSource::kSegments: |
397 | 26.5k | case DataSource::kSections: |
398 | 26.5k | ParseSections(sink); |
399 | 26.5k | break; |
400 | 0 | case DataSource::kSymbols: |
401 | 0 | case DataSource::kRawSymbols: |
402 | 2.90k | case DataSource::kShortSymbols: |
403 | 2.90k | case DataSource::kFullSymbols: |
404 | 2.90k | ParseSymbols(sink); |
405 | 2.90k | break; |
406 | 2.90k | case DataSource::kArchiveMembers: |
407 | 5.81k | case DataSource::kCompileUnits: |
408 | 8.71k | case DataSource::kInlines: |
409 | 8.71k | default: |
410 | 8.71k | THROW("WebAssembly doesn't support this data source"); |
411 | 38.1k | } |
412 | 23.9k | AddWebAssemblyFallback(sink); |
413 | 23.9k | } |
414 | 20.7k | } |
415 | | |
416 | | bool GetDisassemblyInfo(absl::string_view /*symbol*/, |
417 | | DataSource /*symbol_source*/, |
418 | 0 | DisassemblyInfo* /*info*/) const override { |
419 | 0 | WARN("WebAssembly files do not support disassembly yet"); |
420 | 0 | return false; |
421 | 0 | } |
422 | | }; |
423 | | |
424 | | } // namespace wasm |
425 | | |
426 | | std::unique_ptr<ObjectFile> TryOpenWebAssemblyFile( |
427 | 549k | std::unique_ptr<InputFile>& file) { |
428 | 549k | string_view data = file->data(); |
429 | 549k | if (wasm::ReadMagic(&data)) { |
430 | 41.4k | return std::unique_ptr<ObjectFile>( |
431 | 41.4k | new wasm::WebAssemblyObjectFile(std::move(file))); |
432 | 41.4k | } |
433 | | |
434 | 507k | return nullptr; |
435 | 549k | } |
436 | | |
437 | | } // namespace bloaty |