/src/hermes/include/hermes/BCGen/HBC/DebugInfo.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is licensed under the MIT license found in the |
5 | | * LICENSE file in the root directory of this source tree. |
6 | | */ |
7 | | |
8 | | #ifndef HERMES_BCGEN_HBC_DEBUGINFO_H |
9 | | #define HERMES_BCGEN_HBC_DEBUGINFO_H |
10 | | |
11 | | #include "hermes/BCGen/HBC/BytecodeFileFormat.h" |
12 | | #include "hermes/BCGen/HBC/ConsecutiveStringStorage.h" |
13 | | #include "hermes/BCGen/HBC/StreamVector.h" |
14 | | #include "hermes/BCGen/HBC/UniquingFilenameTable.h" |
15 | | #include "hermes/Public/DebuggerTypes.h" |
16 | | #include "hermes/Support/LEB128.h" |
17 | | #include "hermes/Support/OptValue.h" |
18 | | #include "hermes/Support/StringTable.h" |
19 | | #include "hermes/Support/UTF8.h" |
20 | | #include "llvh/ADT/DenseMap.h" |
21 | | #include "llvh/ADT/StringRef.h" |
22 | | #include "llvh/Support/Format.h" |
23 | | |
24 | | #include <string> |
25 | | #include <vector> |
26 | | |
27 | | namespace llvh { |
28 | | class raw_ostream; |
29 | | } // namespace llvh |
30 | | |
31 | | namespace hermes { |
32 | | class SourceMapGenerator; |
33 | | namespace hbc { |
34 | | |
35 | | /// The file name, line and column associated with a bytecode address. |
36 | | struct DebugSourceLocation { |
37 | | // The bytecode offset of this debug info. |
38 | | uint32_t address{0}; |
39 | | // The filename index in the filename table. |
40 | | uint32_t filenameId{0}; |
41 | | // The sourceMappingUrl index in the string table. |
42 | | // Use kInvalidBreakpoint for an invalid URL. |
43 | | uint32_t sourceMappingUrlId{facebook::hermes::debugger::kInvalidBreakpoint}; |
44 | | // The line count, 1 based. |
45 | | uint32_t line{0}; |
46 | | // The column count, 1 based. |
47 | | uint32_t column{0}; |
48 | | // The statement at this location. 1 based, per function. |
49 | | // Initialized to 0, to show that no statements have been generated yet. |
50 | | // Thus, we can see which instructions aren't part of any user-written code. |
51 | | uint32_t statement{0}; |
52 | | |
53 | 229k | DebugSourceLocation() {} |
54 | | |
55 | | DebugSourceLocation( |
56 | | uint32_t address, |
57 | | uint32_t filenameId, |
58 | | uint32_t line, |
59 | | uint32_t column, |
60 | | uint32_t statement) |
61 | | : address(address), |
62 | | filenameId(filenameId), |
63 | | line(line), |
64 | | column(column), |
65 | 0 | statement(statement) {} |
66 | | |
67 | 0 | bool operator==(const DebugSourceLocation &rhs) const { |
68 | 0 | return address == rhs.address && filenameId == rhs.filenameId && |
69 | 0 | line == rhs.line && column == rhs.column && statement == rhs.statement; |
70 | 0 | } |
71 | | |
72 | 0 | bool operator!=(const DebugSourceLocation &rhs) const { |
73 | 0 | return !(*this == rhs); |
74 | 0 | } |
75 | | }; |
76 | | |
77 | | /// The string representing a textual name for a call instruction's callee |
78 | | /// argument. |
79 | | struct DebugTextifiedCallee { |
80 | | // The bytecode offset of this debug info. |
81 | | uint32_t address{0}; |
82 | | // A textual name for the function being called. Must be a valid UTF8 string. |
83 | | Identifier textifiedCallee; |
84 | | }; |
85 | | |
86 | | /// A type wrapping up the offsets into debugging data. |
87 | | struct DebugOffsets { |
88 | | /// Offsets into the debugging data of the source locations |
89 | | /// (DebugSourceLocation). |
90 | | uint32_t sourceLocations = NO_OFFSET; |
91 | | |
92 | | /// Offset into the lexical data section of the debugging data. |
93 | | uint32_t lexicalData = NO_OFFSET; |
94 | | |
95 | | /// Offset into the textified callee data section of the debugging data. |
96 | | uint32_t textifiedCallees = NO_OFFSET; |
97 | | |
98 | | /// Sentinel value indicating no offset. |
99 | | static constexpr uint32_t NO_OFFSET = UINT32_MAX; |
100 | | |
101 | | /// Constructors. |
102 | 114k | DebugOffsets() = default; |
103 | | DebugOffsets(uint32_t src, uint32_t lex, uint32_t tCallee) |
104 | 114k | : sourceLocations(src), lexicalData(lex), textifiedCallees(tCallee) {} |
105 | | }; |
106 | | |
107 | | /// A result of a search for a bytecode offset for where a line/column fall. |
108 | | struct DebugSearchResult { |
109 | | // Offset of the result function in the bytecode stream. |
110 | | uint32_t functionIndex{0}; |
111 | | |
112 | | // Offset of the result instruction in the bytecode, |
113 | | // from the start of the function that it's in. |
114 | | uint32_t bytecodeOffset{0}; |
115 | | |
116 | | /// The actual line that the search found. |
117 | | uint32_t line{0}; |
118 | | |
119 | | /// The actual column that the search found. |
120 | | uint32_t column{0}; |
121 | | |
122 | 0 | DebugSearchResult() {} |
123 | | |
124 | | DebugSearchResult( |
125 | | uint32_t functionIndex, |
126 | | uint32_t bytecodeOffset, |
127 | | uint32_t line, |
128 | | uint32_t column) |
129 | | : functionIndex(functionIndex), |
130 | | bytecodeOffset(bytecodeOffset), |
131 | | line(line), |
132 | 0 | column(column) {} |
133 | | }; |
134 | | |
135 | | /// A data structure for storing debug info. |
136 | | class DebugInfo { |
137 | | public: |
138 | | using DebugFileRegionList = llvh::SmallVector<DebugFileRegion, 1>; |
139 | | |
140 | | private: |
141 | | /// Filename table for mapping to offsets and lengths in filenameStorage_. |
142 | | std::vector<StringTableEntry> filenameTable_{}; |
143 | | |
144 | | /// String storage for filenames. |
145 | | std::vector<unsigned char> filenameStorage_{}; |
146 | | |
147 | | DebugFileRegionList files_{}; |
148 | | uint32_t lexicalDataOffset_ = 0; |
149 | | uint32_t textifiedCalleeOffset_ = 0; |
150 | | uint32_t stringTableOffset_ = 0; |
151 | | StreamVector<uint8_t> data_{}; |
152 | | |
153 | | /// Get source filename as string id. |
154 | | OptValue<uint32_t> getFilenameForAddress(uint32_t debugOffset) const; |
155 | | |
156 | | /// Decodes a string at offset \p offset in \p data, updating offset in-place. |
157 | | /// \return the decoded string. |
158 | | llvh::StringRef decodeString( |
159 | | uint32_t *inoutOffset, |
160 | | llvh::ArrayRef<uint8_t> data) const; |
161 | | |
162 | | public: |
163 | 56 | explicit DebugInfo() = default; |
164 | | /*implicit*/ DebugInfo(DebugInfo &&that) = default; |
165 | | |
166 | | explicit DebugInfo( |
167 | | ConsecutiveStringStorage &&filenameStrings, |
168 | | DebugFileRegionList &&files, |
169 | | uint32_t lexicalDataOffset, |
170 | | uint32_t textifiedCalleeOffset, |
171 | | uint32_t stringTableOffset, |
172 | | StreamVector<uint8_t> &&data) |
173 | | : filenameTable_(filenameStrings.acquireStringTable()), |
174 | | filenameStorage_(filenameStrings.acquireStringStorage()), |
175 | | files_(std::move(files)), |
176 | | lexicalDataOffset_(lexicalDataOffset), |
177 | | textifiedCalleeOffset_(textifiedCalleeOffset), |
178 | | stringTableOffset_(stringTableOffset), |
179 | 56 | data_(std::move(data)) {} |
180 | | |
181 | | explicit DebugInfo( |
182 | | std::vector<StringTableEntry> &&filenameStrings, |
183 | | std::vector<unsigned char> &&filenameStorage, |
184 | | DebugFileRegionList &&files, |
185 | | uint32_t lexicalDataOffset, |
186 | | uint32_t textifiedCalleeOffset, |
187 | | uint32_t stringTableOffset, |
188 | | StreamVector<uint8_t> &&data) |
189 | | : filenameTable_(std::move(filenameStrings)), |
190 | | filenameStorage_(std::move(filenameStorage)), |
191 | | files_(std::move(files)), |
192 | | lexicalDataOffset_(lexicalDataOffset), |
193 | | textifiedCalleeOffset_(textifiedCalleeOffset), |
194 | | stringTableOffset_(stringTableOffset), |
195 | 0 | data_(std::move(data)) {} |
196 | | |
197 | 56 | DebugInfo &operator=(DebugInfo &&that) = default; |
198 | | |
199 | 0 | const DebugFileRegionList &viewFiles() const { |
200 | 0 | return files_; |
201 | 0 | } |
202 | 0 | const StreamVector<uint8_t> &viewData() const { |
203 | 0 | return data_; |
204 | 0 | } |
205 | 0 | llvh::ArrayRef<StringTableEntry> getFilenameTable() const { |
206 | 0 | return filenameTable_; |
207 | 0 | } |
208 | 0 | llvh::ArrayRef<unsigned char> getFilenameStorage() const { |
209 | 0 | return filenameStorage_; |
210 | 0 | } |
211 | | |
212 | | /// Retrieve the filename for a given \p id in the filename table. |
213 | 24 | std::string getFilenameByID(uint32_t id) const { |
214 | 24 | assert(id < filenameTable_.size() && "Filename ID out of bounds"); |
215 | 0 | std::string utf8Storage; |
216 | 24 | return getStringFromEntry(filenameTable_[id], filenameStorage_, utf8Storage) |
217 | 24 | .str(); |
218 | 24 | } |
219 | | |
220 | 0 | uint32_t lexicalDataOffset() const { |
221 | 0 | return lexicalDataOffset_; |
222 | 0 | } |
223 | | |
224 | 0 | uint32_t textifiedCalleeOffset() const { |
225 | 0 | return textifiedCalleeOffset_; |
226 | 0 | } |
227 | | |
228 | 0 | uint32_t stringTableOffset() const { |
229 | 0 | return stringTableOffset_; |
230 | 0 | } |
231 | | |
232 | | /// Get the location of \p offsetInFunction, given the function's debug |
233 | | /// offset. |
234 | | OptValue<DebugSourceLocation> getLocationForAddress( |
235 | | uint32_t debugOffset, |
236 | | uint32_t offsetInFunction) const; |
237 | | |
238 | | /// \return the name of the textified callee for the function called in the |
239 | | /// given \p offsetInFunction. Encoding is UTF8. |
240 | | OptValue<llvh::StringRef> getTextifiedCalleeUTF8( |
241 | | uint32_t debugOffset, |
242 | | uint32_t offsetInFunction) const; |
243 | | |
244 | | /// Given a \p targetLine and optional \p targetColumn, |
245 | | /// find a bytecode address at which that location is listed in debug info. |
246 | | /// If \p targetColumn is None, then it tries to match at the first location |
247 | | /// in \p line, else it tries to match at column \p targetColumn. |
248 | | OptValue<DebugSearchResult> getAddressForLocation( |
249 | | uint32_t filenameId, |
250 | | uint32_t targetLine, |
251 | | OptValue<uint32_t> targetColumn) const; |
252 | | |
253 | | /// Read variable names at \p offset into the lexical data section |
254 | | /// of the debug info. \return the list of variable names. |
255 | | llvh::SmallVector<llvh::StringRef, 4> getVariableNames(uint32_t offset) const; |
256 | | |
257 | | /// Reads out the parent function ID of the function whose lexical debug data |
258 | | /// starts at \p offset. \return the ID of the parent function, or None if |
259 | | /// none. |
260 | | OptValue<uint32_t> getParentFunctionId(uint32_t offset) const; |
261 | | |
262 | | /// \return the size in bytes of the serialized string table. |
263 | 4 | uint32_t getStringTableSizeBytes() const { |
264 | 4 | return stringTableOffset_ - textifiedCalleeOffset_; |
265 | 4 | } |
266 | | |
267 | | private: |
268 | | /// Accessors for portions of data_, which looks like this: |
269 | | /// [sourceLocations][lexicalData][textifiedCallee][stringTable] |
270 | | /// | | ^ stringTableOffset_ |
271 | | /// | ^ textifiedCalleeOffset_ |
272 | | /// ^ lexicalDataOffset_ |
273 | | |
274 | | /// \return the slice of data_ reflecting the source locations. |
275 | 0 | llvh::ArrayRef<uint8_t> sourceLocationsData() const { |
276 | 0 | return data_.getData().slice(0, lexicalDataOffset_); |
277 | 0 | } |
278 | | |
279 | | /// \return the slice of data_ reflecting the lexical data. |
280 | 0 | llvh::ArrayRef<uint8_t> lexicalData() const { |
281 | 0 | return data_.getData().slice( |
282 | 0 | lexicalDataOffset_, textifiedCalleeOffset_ - lexicalDataOffset_); |
283 | 0 | } |
284 | | |
285 | | /// \return the slice of data_ reflecting the textified callee table. |
286 | 4 | llvh::ArrayRef<uint8_t> textifiedCalleeData() const { |
287 | 4 | return data_.getData().slice( |
288 | 4 | textifiedCalleeOffset_, getStringTableSizeBytes()); |
289 | 4 | } |
290 | | |
291 | | /// \return the slice of data_ reflecting the string table data. |
292 | 8 | llvh::ArrayRef<uint8_t> stringTableData() const { |
293 | 8 | return data_.getData().slice(stringTableOffset_); |
294 | 8 | } |
295 | | |
296 | | void disassembleFilenames(llvh::raw_ostream &OS) const; |
297 | | void disassembleFilesAndOffsets(llvh::raw_ostream &OS) const; |
298 | | void disassembleLexicalData(llvh::raw_ostream &OS) const; |
299 | | void disassembleTextifiedCallee(llvh::raw_ostream &OS) const; |
300 | | void disassembleStringTable(llvh::raw_ostream &OS) const; |
301 | | |
302 | | public: |
303 | 0 | void disassemble(llvh::raw_ostream &OS) const { |
304 | 0 | disassembleFilenames(OS); |
305 | 0 | disassembleFilesAndOffsets(OS); |
306 | 0 | disassembleLexicalData(OS); |
307 | 0 | disassembleTextifiedCallee(OS); |
308 | 0 | disassembleStringTable(OS); |
309 | 0 | } |
310 | | |
311 | | #ifndef HERMESVM_LEAN |
312 | | /// Populate the given source map \p sourceMap with debug information. |
313 | | /// Each opcode with line and column information is mapped to its absolute |
314 | | /// offset in the bytecode file. To determine these absolute offsets, the |
315 | | /// functionOffsets parameter maps functions (indexed by their id) to their |
316 | | /// start position in the bytecode file. |
317 | | void populateSourceMap( |
318 | | SourceMapGenerator *sourceMap, |
319 | | std::vector<uint32_t> &&functionOffsets, |
320 | | uint32_t segmentID) const; |
321 | | #endif |
322 | | }; |
323 | | |
324 | | class DebugInfoGenerator { |
325 | | private: |
326 | | /// A special offset for representing the most common entry in its table. |
327 | | /// |
328 | | /// For Debug Lexical Table, it represents the most common lexical info |
329 | | /// (vars count: 0, lexical parent: none). When compiled without -g, |
330 | | /// this common value applies to all functions without local variables. |
331 | | /// This optimization reduces hbc bundle size; When compiled with -g, the |
332 | | /// lexical parent is none for the global function, but not any other |
333 | | /// functions. As a result, this optimization does not provide value. |
334 | | /// |
335 | | /// For textified callee table, it represents an empty table. |
336 | | static constexpr uint32_t kMostCommonEntryOffset = 0; |
337 | | |
338 | | bool validData{true}; |
339 | | |
340 | | /// Serialized source location data. |
341 | | std::vector<uint8_t> sourcesData_{}; |
342 | | |
343 | | /// String storage for filenames. |
344 | | /// ConsecutiveStringStorage is not copy-constructible or copy-assignable. |
345 | | ConsecutiveStringStorage filenameStrings_; |
346 | | |
347 | | /// List of files mapping file ID to source location offsets. |
348 | | DebugInfo::DebugFileRegionList files_{}; |
349 | | |
350 | | /// Serialized lexical data, which contains information about the variables |
351 | | /// associated with each code block. |
352 | | std::vector<uint8_t> lexicalData_; |
353 | | |
354 | | /// Serialized textified callee table. |
355 | | std::vector<uint8_t> textifiedCallees_; |
356 | | |
357 | | /// The debug info string table. All string entries in the debug info records |
358 | | /// point to an entry in this table. Strings are encoded as size-prefixed, |
359 | | /// UTF8-encoded payloads. |
360 | | std::vector<uint8_t> stringTable_; |
361 | | |
362 | | /// An index for strings in stringTable_. |
363 | | llvh::DenseMap<UniqueString *, uint32_t> stringTableIndex_; |
364 | | |
365 | 3.39M | int32_t delta(uint32_t to, uint32_t from) { |
366 | 3.39M | int64_t diff = (int64_t)to - from; |
367 | | // It's unlikely that lines or columns will ever jump from 0 to 3 billion, |
368 | | // but if it ever happens we can extend to 64bit types. |
369 | 3.39M | assert( |
370 | 3.39M | diff <= INT32_MAX && diff >= INT32_MIN && |
371 | 3.39M | "uint32_t delta too large when encoding debug info"); |
372 | 0 | return (int32_t)diff; |
373 | 3.39M | } |
374 | | |
375 | | /// Appends \p str to stringTable_ if not already present, then |
376 | | /// appends \p str's offset in stringTable_ to the given \p data. |
377 | | void appendString(std::vector<uint8_t> &data, Identifier str); |
378 | | |
379 | | /// No copy constructor or copy assignment operator. |
380 | | /// Note that filenameStrings_ is of type ConsecutiveStringStorage, which |
381 | | /// is not copy-constructible or copy-assignable. |
382 | | DebugInfoGenerator(const DebugInfoGenerator &) = delete; |
383 | | DebugInfoGenerator &operator=(const DebugInfoGenerator &) = delete; |
384 | | |
385 | | public: |
386 | | explicit DebugInfoGenerator(UniquingFilenameTable &&filenameTable); |
387 | | |
388 | | DebugInfoGenerator(DebugInfoGenerator &&) = default; |
389 | | |
390 | | uint32_t appendSourceLocations( |
391 | | const DebugSourceLocation &start, |
392 | | uint32_t functionIndex, |
393 | | llvh::ArrayRef<DebugSourceLocation> offsets); |
394 | | |
395 | | /// Append lexical data including parent function \p parentFunctionIndex and |
396 | | /// list of variable names \p namesUTF8 to the debug data. Each string in \p |
397 | | /// namesUTF8 must be a valid UTF8 string. \return the offset in the lexical |
398 | | /// section of the debug data. |
399 | | uint32_t appendLexicalData( |
400 | | OptValue<uint32_t> parentFunctionIndex, |
401 | | llvh::ArrayRef<Identifier> namesUTF8); |
402 | | |
403 | | /// Append the textified callee data to the debug data. \return the offset in |
404 | | /// the textified callee table of the debug data. |
405 | | uint32_t appendTextifiedCalleeData( |
406 | | llvh::ArrayRef<DebugTextifiedCallee> textifiedCallees); |
407 | | |
408 | | // Destructively move memory to a DebugInfo. |
409 | | DebugInfo serializeWithMove(); |
410 | | }; |
411 | | |
412 | | } // namespace hbc |
413 | | } // namespace hermes |
414 | | #endif // HERMES_BCGEN_HBC_DEBUGINFO_H |