Coverage Report

Created: 2023-02-22 06:51

/src/hermes/include/hermes/BCGen/HBC/DebugInfo.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) Meta Platforms, Inc. and affiliates.
3
 *
4
 * This source code is licensed under the MIT license found in the
5
 * LICENSE file in the root directory of this source tree.
6
 */
7
8
#ifndef HERMES_BCGEN_HBC_DEBUGINFO_H
9
#define HERMES_BCGEN_HBC_DEBUGINFO_H
10
11
#include "hermes/BCGen/HBC/BytecodeFileFormat.h"
12
#include "hermes/BCGen/HBC/ConsecutiveStringStorage.h"
13
#include "hermes/BCGen/HBC/StreamVector.h"
14
#include "hermes/BCGen/HBC/UniquingFilenameTable.h"
15
#include "hermes/Public/DebuggerTypes.h"
16
#include "hermes/Support/LEB128.h"
17
#include "hermes/Support/OptValue.h"
18
#include "hermes/Support/StringTable.h"
19
#include "hermes/Support/UTF8.h"
20
#include "llvh/ADT/DenseMap.h"
21
#include "llvh/ADT/StringRef.h"
22
#include "llvh/Support/Format.h"
23
24
#include <string>
25
#include <vector>
26
27
namespace llvh {
28
class raw_ostream;
29
} // namespace llvh
30
31
namespace hermes {
32
class SourceMapGenerator;
33
namespace hbc {
34
35
/// The file name, line and column associated with a bytecode address.
36
struct DebugSourceLocation {
37
  // The bytecode offset of this debug info.
38
  uint32_t address{0};
39
  // The filename index in the filename table.
40
  uint32_t filenameId{0};
41
  // The sourceMappingUrl index in the string table.
42
  // Use kInvalidBreakpoint for an invalid URL.
43
  uint32_t sourceMappingUrlId{facebook::hermes::debugger::kInvalidBreakpoint};
44
  // The line count, 1 based.
45
  uint32_t line{0};
46
  // The column count, 1 based.
47
  uint32_t column{0};
48
  // The statement at this location. 1 based, per function.
49
  // Initialized to 0, to show that no statements have been generated yet.
50
  // Thus, we can see which instructions aren't part of any user-written code.
51
  uint32_t statement{0};
52
53
229k
  DebugSourceLocation() {}
54
55
  DebugSourceLocation(
56
      uint32_t address,
57
      uint32_t filenameId,
58
      uint32_t line,
59
      uint32_t column,
60
      uint32_t statement)
61
      : address(address),
62
        filenameId(filenameId),
63
        line(line),
64
        column(column),
65
0
        statement(statement) {}
66
67
0
  bool operator==(const DebugSourceLocation &rhs) const {
68
0
    return address == rhs.address && filenameId == rhs.filenameId &&
69
0
        line == rhs.line && column == rhs.column && statement == rhs.statement;
70
0
  }
71
72
0
  bool operator!=(const DebugSourceLocation &rhs) const {
73
0
    return !(*this == rhs);
74
0
  }
75
};
76
77
/// The string representing a textual name for a call instruction's callee
78
/// argument.
79
struct DebugTextifiedCallee {
80
  // The bytecode offset of this debug info.
81
  uint32_t address{0};
82
  // A textual name for the function being called. Must be a valid UTF8 string.
83
  Identifier textifiedCallee;
84
};
85
86
/// A type wrapping up the offsets into debugging data.
87
struct DebugOffsets {
88
  /// Offsets into the debugging data of the source locations
89
  /// (DebugSourceLocation).
90
  uint32_t sourceLocations = NO_OFFSET;
91
92
  /// Offset into the lexical data section of the debugging data.
93
  uint32_t lexicalData = NO_OFFSET;
94
95
  /// Offset into the textified callee data section of the debugging data.
96
  uint32_t textifiedCallees = NO_OFFSET;
97
98
  /// Sentinel value indicating no offset.
99
  static constexpr uint32_t NO_OFFSET = UINT32_MAX;
100
101
  /// Constructors.
102
114k
  DebugOffsets() = default;
103
  DebugOffsets(uint32_t src, uint32_t lex, uint32_t tCallee)
104
114k
      : sourceLocations(src), lexicalData(lex), textifiedCallees(tCallee) {}
105
};
106
107
/// A result of a search for a bytecode offset for where a line/column fall.
108
struct DebugSearchResult {
109
  // Offset of the result function in the bytecode stream.
110
  uint32_t functionIndex{0};
111
112
  // Offset of the result instruction in the bytecode,
113
  // from the start of the function that it's in.
114
  uint32_t bytecodeOffset{0};
115
116
  /// The actual line that the search found.
117
  uint32_t line{0};
118
119
  /// The actual column that the search found.
120
  uint32_t column{0};
121
122
0
  DebugSearchResult() {}
123
124
  DebugSearchResult(
125
      uint32_t functionIndex,
126
      uint32_t bytecodeOffset,
127
      uint32_t line,
128
      uint32_t column)
129
      : functionIndex(functionIndex),
130
        bytecodeOffset(bytecodeOffset),
131
        line(line),
132
0
        column(column) {}
133
};
134
135
/// A data structure for storing debug info.
136
class DebugInfo {
137
 public:
138
  using DebugFileRegionList = llvh::SmallVector<DebugFileRegion, 1>;
139
140
 private:
141
  /// Filename table for mapping to offsets and lengths in filenameStorage_.
142
  std::vector<StringTableEntry> filenameTable_{};
143
144
  /// String storage for filenames.
145
  std::vector<unsigned char> filenameStorage_{};
146
147
  DebugFileRegionList files_{};
148
  uint32_t lexicalDataOffset_ = 0;
149
  uint32_t textifiedCalleeOffset_ = 0;
150
  uint32_t stringTableOffset_ = 0;
151
  StreamVector<uint8_t> data_{};
152
153
  /// Get source filename as string id.
154
  OptValue<uint32_t> getFilenameForAddress(uint32_t debugOffset) const;
155
156
  /// Decodes a string at offset \p offset in \p data, updating offset in-place.
157
  /// \return the decoded string.
158
  llvh::StringRef decodeString(
159
      uint32_t *inoutOffset,
160
      llvh::ArrayRef<uint8_t> data) const;
161
162
 public:
163
56
  explicit DebugInfo() = default;
164
  /*implicit*/ DebugInfo(DebugInfo &&that) = default;
165
166
  explicit DebugInfo(
167
      ConsecutiveStringStorage &&filenameStrings,
168
      DebugFileRegionList &&files,
169
      uint32_t lexicalDataOffset,
170
      uint32_t textifiedCalleeOffset,
171
      uint32_t stringTableOffset,
172
      StreamVector<uint8_t> &&data)
173
      : filenameTable_(filenameStrings.acquireStringTable()),
174
        filenameStorage_(filenameStrings.acquireStringStorage()),
175
        files_(std::move(files)),
176
        lexicalDataOffset_(lexicalDataOffset),
177
        textifiedCalleeOffset_(textifiedCalleeOffset),
178
        stringTableOffset_(stringTableOffset),
179
56
        data_(std::move(data)) {}
180
181
  explicit DebugInfo(
182
      std::vector<StringTableEntry> &&filenameStrings,
183
      std::vector<unsigned char> &&filenameStorage,
184
      DebugFileRegionList &&files,
185
      uint32_t lexicalDataOffset,
186
      uint32_t textifiedCalleeOffset,
187
      uint32_t stringTableOffset,
188
      StreamVector<uint8_t> &&data)
189
      : filenameTable_(std::move(filenameStrings)),
190
        filenameStorage_(std::move(filenameStorage)),
191
        files_(std::move(files)),
192
        lexicalDataOffset_(lexicalDataOffset),
193
        textifiedCalleeOffset_(textifiedCalleeOffset),
194
        stringTableOffset_(stringTableOffset),
195
0
        data_(std::move(data)) {}
196
197
56
  DebugInfo &operator=(DebugInfo &&that) = default;
198
199
0
  const DebugFileRegionList &viewFiles() const {
200
0
    return files_;
201
0
  }
202
0
  const StreamVector<uint8_t> &viewData() const {
203
0
    return data_;
204
0
  }
205
0
  llvh::ArrayRef<StringTableEntry> getFilenameTable() const {
206
0
    return filenameTable_;
207
0
  }
208
0
  llvh::ArrayRef<unsigned char> getFilenameStorage() const {
209
0
    return filenameStorage_;
210
0
  }
211
212
  /// Retrieve the filename for a given \p id in the filename table.
213
24
  std::string getFilenameByID(uint32_t id) const {
214
24
    assert(id < filenameTable_.size() && "Filename ID out of bounds");
215
0
    std::string utf8Storage;
216
24
    return getStringFromEntry(filenameTable_[id], filenameStorage_, utf8Storage)
217
24
        .str();
218
24
  }
219
220
0
  uint32_t lexicalDataOffset() const {
221
0
    return lexicalDataOffset_;
222
0
  }
223
224
0
  uint32_t textifiedCalleeOffset() const {
225
0
    return textifiedCalleeOffset_;
226
0
  }
227
228
0
  uint32_t stringTableOffset() const {
229
0
    return stringTableOffset_;
230
0
  }
231
232
  /// Get the location of \p offsetInFunction, given the function's debug
233
  /// offset.
234
  OptValue<DebugSourceLocation> getLocationForAddress(
235
      uint32_t debugOffset,
236
      uint32_t offsetInFunction) const;
237
238
  /// \return the name of the textified callee for the function called in the
239
  /// given \p offsetInFunction. Encoding is UTF8.
240
  OptValue<llvh::StringRef> getTextifiedCalleeUTF8(
241
      uint32_t debugOffset,
242
      uint32_t offsetInFunction) const;
243
244
  /// Given a \p targetLine and optional \p targetColumn,
245
  /// find a bytecode address at which that location is listed in debug info.
246
  /// If \p targetColumn is None, then it tries to match at the first location
247
  /// in \p line, else it tries to match at column \p targetColumn.
248
  OptValue<DebugSearchResult> getAddressForLocation(
249
      uint32_t filenameId,
250
      uint32_t targetLine,
251
      OptValue<uint32_t> targetColumn) const;
252
253
  /// Read variable names at \p offset into the lexical data section
254
  /// of the debug info. \return the list of variable names.
255
  llvh::SmallVector<llvh::StringRef, 4> getVariableNames(uint32_t offset) const;
256
257
  /// Reads out the parent function ID of the function whose lexical debug data
258
  /// starts at \p offset. \return the ID of the parent function, or None if
259
  /// none.
260
  OptValue<uint32_t> getParentFunctionId(uint32_t offset) const;
261
262
  /// \return the size in bytes of the serialized string table.
263
4
  uint32_t getStringTableSizeBytes() const {
264
4
    return stringTableOffset_ - textifiedCalleeOffset_;
265
4
  }
266
267
 private:
268
  /// Accessors for portions of data_, which looks like this:
269
  /// [sourceLocations][lexicalData][textifiedCallee][stringTable]
270
  ///                  |            |                ^ stringTableOffset_
271
  ///                  |            ^ textifiedCalleeOffset_
272
  ///                  ^ lexicalDataOffset_
273
274
  /// \return the slice of data_ reflecting the source locations.
275
0
  llvh::ArrayRef<uint8_t> sourceLocationsData() const {
276
0
    return data_.getData().slice(0, lexicalDataOffset_);
277
0
  }
278
279
  /// \return the slice of data_ reflecting the lexical data.
280
0
  llvh::ArrayRef<uint8_t> lexicalData() const {
281
0
    return data_.getData().slice(
282
0
        lexicalDataOffset_, textifiedCalleeOffset_ - lexicalDataOffset_);
283
0
  }
284
285
  /// \return the slice of data_ reflecting the textified callee table.
286
4
  llvh::ArrayRef<uint8_t> textifiedCalleeData() const {
287
4
    return data_.getData().slice(
288
4
        textifiedCalleeOffset_, getStringTableSizeBytes());
289
4
  }
290
291
  /// \return the slice of data_ reflecting the string table data.
292
8
  llvh::ArrayRef<uint8_t> stringTableData() const {
293
8
    return data_.getData().slice(stringTableOffset_);
294
8
  }
295
296
  void disassembleFilenames(llvh::raw_ostream &OS) const;
297
  void disassembleFilesAndOffsets(llvh::raw_ostream &OS) const;
298
  void disassembleLexicalData(llvh::raw_ostream &OS) const;
299
  void disassembleTextifiedCallee(llvh::raw_ostream &OS) const;
300
  void disassembleStringTable(llvh::raw_ostream &OS) const;
301
302
 public:
303
0
  void disassemble(llvh::raw_ostream &OS) const {
304
0
    disassembleFilenames(OS);
305
0
    disassembleFilesAndOffsets(OS);
306
0
    disassembleLexicalData(OS);
307
0
    disassembleTextifiedCallee(OS);
308
0
    disassembleStringTable(OS);
309
0
  }
310
311
#ifndef HERMESVM_LEAN
312
  /// Populate the given source map \p sourceMap with debug information.
313
  /// Each opcode with line and column information is mapped to its absolute
314
  /// offset in the bytecode file. To determine these absolute offsets, the
315
  /// functionOffsets parameter maps functions (indexed by their id) to their
316
  /// start position in the bytecode file.
317
  void populateSourceMap(
318
      SourceMapGenerator *sourceMap,
319
      std::vector<uint32_t> &&functionOffsets,
320
      uint32_t segmentID) const;
321
#endif
322
};
323
324
class DebugInfoGenerator {
325
 private:
326
  /// A special offset for representing the most common entry in its table.
327
  ///
328
  /// For Debug Lexical Table, it represents the most common lexical info
329
  /// (vars count: 0, lexical parent: none). When compiled without -g,
330
  /// this common value applies to all functions without local variables.
331
  /// This optimization reduces hbc bundle size; When compiled with -g, the
332
  /// lexical parent is none for the global function, but not any other
333
  /// functions. As a result, this optimization does not provide value.
334
  ///
335
  /// For textified callee table, it represents an empty table.
336
  static constexpr uint32_t kMostCommonEntryOffset = 0;
337
338
  bool validData{true};
339
340
  /// Serialized source location data.
341
  std::vector<uint8_t> sourcesData_{};
342
343
  /// String storage for filenames.
344
  /// ConsecutiveStringStorage is not copy-constructible or copy-assignable.
345
  ConsecutiveStringStorage filenameStrings_;
346
347
  /// List of files mapping file ID to source location offsets.
348
  DebugInfo::DebugFileRegionList files_{};
349
350
  /// Serialized lexical data, which contains information about the variables
351
  /// associated with each code block.
352
  std::vector<uint8_t> lexicalData_;
353
354
  /// Serialized textified callee table.
355
  std::vector<uint8_t> textifiedCallees_;
356
357
  /// The debug info string table. All string entries in the debug info records
358
  /// point to an entry in this table. Strings are encoded as size-prefixed,
359
  /// UTF8-encoded payloads.
360
  std::vector<uint8_t> stringTable_;
361
362
  /// An index for strings in stringTable_.
363
  llvh::DenseMap<UniqueString *, uint32_t> stringTableIndex_;
364
365
3.39M
  int32_t delta(uint32_t to, uint32_t from) {
366
3.39M
    int64_t diff = (int64_t)to - from;
367
    // It's unlikely that lines or columns will ever jump from 0 to 3 billion,
368
    // but if it ever happens we can extend to 64bit types.
369
3.39M
    assert(
370
3.39M
        diff <= INT32_MAX && diff >= INT32_MIN &&
371
3.39M
        "uint32_t delta too large when encoding debug info");
372
0
    return (int32_t)diff;
373
3.39M
  }
374
375
  /// Appends \p str to stringTable_ if not already present, then
376
  /// appends \p str's offset in stringTable_ to the given \p data.
377
  void appendString(std::vector<uint8_t> &data, Identifier str);
378
379
  /// No copy constructor or copy assignment operator.
380
  /// Note that filenameStrings_ is of type ConsecutiveStringStorage, which
381
  /// is not copy-constructible or copy-assignable.
382
  DebugInfoGenerator(const DebugInfoGenerator &) = delete;
383
  DebugInfoGenerator &operator=(const DebugInfoGenerator &) = delete;
384
385
 public:
386
  explicit DebugInfoGenerator(UniquingFilenameTable &&filenameTable);
387
388
  DebugInfoGenerator(DebugInfoGenerator &&) = default;
389
390
  uint32_t appendSourceLocations(
391
      const DebugSourceLocation &start,
392
      uint32_t functionIndex,
393
      llvh::ArrayRef<DebugSourceLocation> offsets);
394
395
  /// Append lexical data including parent function \p parentFunctionIndex and
396
  /// list of variable names \p namesUTF8 to the debug data. Each string in \p
397
  /// namesUTF8 must be a valid UTF8 string. \return the offset in the lexical
398
  /// section of the debug data.
399
  uint32_t appendLexicalData(
400
      OptValue<uint32_t> parentFunctionIndex,
401
      llvh::ArrayRef<Identifier> namesUTF8);
402
403
  /// Append the textified callee data to the debug data. \return the offset in
404
  /// the textified callee table of the debug data.
405
  uint32_t appendTextifiedCalleeData(
406
      llvh::ArrayRef<DebugTextifiedCallee> textifiedCallees);
407
408
  // Destructively move memory to a DebugInfo.
409
  DebugInfo serializeWithMove();
410
};
411
412
} // namespace hbc
413
} // namespace hermes
414
#endif // HERMES_BCGEN_HBC_DEBUGINFO_H