Coverage Report

Created: 2024-01-17 10:31

/src/llvm-project/llvm/lib/Object/IRSymtab.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/Object/IRSymtab.h"
10
#include "llvm/ADT/ArrayRef.h"
11
#include "llvm/ADT/DenseMap.h"
12
#include "llvm/ADT/SmallPtrSet.h"
13
#include "llvm/ADT/SmallString.h"
14
#include "llvm/ADT/SmallVector.h"
15
#include "llvm/ADT/StringRef.h"
16
#include "llvm/Bitcode/BitcodeReader.h"
17
#include "llvm/Config/llvm-config.h"
18
#include "llvm/IR/Comdat.h"
19
#include "llvm/IR/DataLayout.h"
20
#include "llvm/IR/GlobalAlias.h"
21
#include "llvm/IR/GlobalObject.h"
22
#include "llvm/IR/Mangler.h"
23
#include "llvm/IR/Metadata.h"
24
#include "llvm/IR/Module.h"
25
#include "llvm/MC/StringTableBuilder.h"
26
#include "llvm/Object/ModuleSymbolTable.h"
27
#include "llvm/Object/SymbolicFile.h"
28
#include "llvm/Support/Allocator.h"
29
#include "llvm/Support/Casting.h"
30
#include "llvm/Support/CommandLine.h"
31
#include "llvm/Support/Error.h"
32
#include "llvm/Support/StringSaver.h"
33
#include "llvm/Support/VCSRevision.h"
34
#include "llvm/Support/raw_ostream.h"
35
#include "llvm/TargetParser/Triple.h"
36
#include <cassert>
37
#include <string>
38
#include <utility>
39
#include <vector>
40
41
using namespace llvm;
42
using namespace irsymtab;
43
44
static cl::opt<bool> DisableBitcodeVersionUpgrade(
45
    "disable-bitcode-version-upgrade", cl::Hidden,
46
    cl::desc("Disable automatic bitcode upgrade for version mismatch"));
47
48
static const char *PreservedSymbols[] = {
49
#define HANDLE_LIBCALL(code, name) name,
50
#include "llvm/IR/RuntimeLibcalls.def"
51
#undef HANDLE_LIBCALL
52
    // There are global variables, so put it here instead of in
53
    // RuntimeLibcalls.def.
54
    // TODO: Are there similar such variables?
55
    "__ssp_canary_word",
56
    "__stack_chk_guard",
57
};
58
59
namespace {
60
61
70
const char *getExpectedProducerName() {
62
70
  static char DefaultName[] = LLVM_VERSION_STRING
63
70
#ifdef LLVM_REVISION
64
70
      " " LLVM_REVISION
65
70
#endif
66
70
      ;
67
  // Allows for testing of the irsymtab writer and upgrade mechanism. This
68
  // environment variable should not be set by users.
69
70
  if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))
70
0
    return OverrideName;
71
70
  return DefaultName;
72
70
}
73
74
const char *kExpectedProducerName = getExpectedProducerName();
75
76
/// Stores the temporary state that is required to build an IR symbol table.
77
struct Builder {
78
  SmallVector<char, 0> &Symtab;
79
  StringTableBuilder &StrtabBuilder;
80
  StringSaver Saver;
81
82
  // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
83
  // The StringTableBuilder does not create a copy of any strings added to it,
84
  // so this provides somewhere to store any strings that we create.
85
  Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
86
          BumpPtrAllocator &Alloc)
87
0
      : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
88
89
  DenseMap<const Comdat *, int> ComdatMap;
90
  Mangler Mang;
91
  Triple TT;
92
93
  std::vector<storage::Comdat> Comdats;
94
  std::vector<storage::Module> Mods;
95
  std::vector<storage::Symbol> Syms;
96
  std::vector<storage::Uncommon> Uncommons;
97
98
  std::string COFFLinkerOpts;
99
  raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
100
101
  std::vector<storage::Str> DependentLibraries;
102
103
0
  void setStr(storage::Str &S, StringRef Value) {
104
0
    S.Offset = StrtabBuilder.add(Value);
105
0
    S.Size = Value.size();
106
0
  }
107
108
  template <typename T>
109
0
  void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
110
0
    R.Offset = Symtab.size();
111
0
    R.Size = Objs.size();
112
0
    Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),
113
0
                  reinterpret_cast<const char *>(Objs.data() + Objs.size()));
114
0
  }
Unexecuted instantiation: IRSymtab.cpp:void (anonymous namespace)::Builder::writeRange<llvm::irsymtab::storage::Module>(llvm::irsymtab::storage::Range<llvm::irsymtab::storage::Module>&, std::__1::vector<llvm::irsymtab::storage::Module, std::__1::allocator<llvm::irsymtab::storage::Module> > const&)
Unexecuted instantiation: IRSymtab.cpp:void (anonymous namespace)::Builder::writeRange<llvm::irsymtab::storage::Comdat>(llvm::irsymtab::storage::Range<llvm::irsymtab::storage::Comdat>&, std::__1::vector<llvm::irsymtab::storage::Comdat, std::__1::allocator<llvm::irsymtab::storage::Comdat> > const&)
Unexecuted instantiation: IRSymtab.cpp:void (anonymous namespace)::Builder::writeRange<llvm::irsymtab::storage::Symbol>(llvm::irsymtab::storage::Range<llvm::irsymtab::storage::Symbol>&, std::__1::vector<llvm::irsymtab::storage::Symbol, std::__1::allocator<llvm::irsymtab::storage::Symbol> > const&)
Unexecuted instantiation: IRSymtab.cpp:void (anonymous namespace)::Builder::writeRange<llvm::irsymtab::storage::Uncommon>(llvm::irsymtab::storage::Range<llvm::irsymtab::storage::Uncommon>&, std::__1::vector<llvm::irsymtab::storage::Uncommon, std::__1::allocator<llvm::irsymtab::storage::Uncommon> > const&)
Unexecuted instantiation: IRSymtab.cpp:void (anonymous namespace)::Builder::writeRange<llvm::irsymtab::storage::Str>(llvm::irsymtab::storage::Range<llvm::irsymtab::storage::Str>&, std::__1::vector<llvm::irsymtab::storage::Str, std::__1::allocator<llvm::irsymtab::storage::Str> > const&)
115
116
  Expected<int> getComdatIndex(const Comdat *C, const Module *M);
117
118
  Error addModule(Module *M);
119
  Error addSymbol(const ModuleSymbolTable &Msymtab,
120
                  const SmallPtrSet<GlobalValue *, 4> &Used,
121
                  ModuleSymbolTable::Symbol Sym);
122
123
  Error build(ArrayRef<Module *> Mods);
124
};
125
126
0
Error Builder::addModule(Module *M) {
127
0
  if (M->getDataLayoutStr().empty())
128
0
    return make_error<StringError>("input module has no datalayout",
129
0
                                   inconvertibleErrorCode());
130
131
  // Symbols in the llvm.used list will get the FB_Used bit and will not be
132
  // internalized. We do this for llvm.compiler.used as well:
133
  //
134
  // IR symbol table tracks module-level asm symbol references but not inline
135
  // asm. A symbol only referenced by inline asm is not in the IR symbol table,
136
  // so we may not know that the definition (in another translation unit) is
137
  // referenced. That definition may have __attribute__((used)) (which lowers to
138
  // llvm.compiler.used on ELF targets) to communicate to the compiler that it
139
  // may be used by inline asm. The usage is perfectly fine, so we treat
140
  // llvm.compiler.used conservatively as llvm.used to work around our own
141
  // limitation.
142
0
  SmallVector<GlobalValue *, 4> UsedV;
143
0
  collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/false);
144
0
  collectUsedGlobalVariables(*M, UsedV, /*CompilerUsed=*/true);
145
0
  SmallPtrSet<GlobalValue *, 4> Used(UsedV.begin(), UsedV.end());
146
147
0
  ModuleSymbolTable Msymtab;
148
0
  Msymtab.addModule(M);
149
150
0
  storage::Module Mod;
151
0
  Mod.Begin = Syms.size();
152
0
  Mod.End = Syms.size() + Msymtab.symbols().size();
153
0
  Mod.UncBegin = Uncommons.size();
154
0
  Mods.push_back(Mod);
155
156
0
  if (TT.isOSBinFormatCOFF()) {
157
0
    if (auto E = M->materializeMetadata())
158
0
      return E;
159
0
    if (NamedMDNode *LinkerOptions =
160
0
            M->getNamedMetadata("llvm.linker.options")) {
161
0
      for (MDNode *MDOptions : LinkerOptions->operands())
162
0
        for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
163
0
          COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
164
0
    }
165
0
  }
166
167
0
  if (TT.isOSBinFormatELF()) {
168
0
    if (auto E = M->materializeMetadata())
169
0
      return E;
170
0
    if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {
171
0
      for (MDNode *MDOptions : N->operands()) {
172
0
        const auto OperandStr =
173
0
            cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();
174
0
        storage::Str Specifier;
175
0
        setStr(Specifier, OperandStr);
176
0
        DependentLibraries.emplace_back(Specifier);
177
0
      }
178
0
    }
179
0
  }
180
181
0
  for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
182
0
    if (Error Err = addSymbol(Msymtab, Used, Msym))
183
0
      return Err;
184
185
0
  return Error::success();
186
0
}
187
188
0
Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
189
0
  auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
190
0
  if (P.second) {
191
0
    std::string Name;
192
0
    if (TT.isOSBinFormatCOFF()) {
193
0
      const GlobalValue *GV = M->getNamedValue(C->getName());
194
0
      if (!GV)
195
0
        return make_error<StringError>("Could not find leader",
196
0
                                       inconvertibleErrorCode());
197
      // Internal leaders do not affect symbol resolution, therefore they do not
198
      // appear in the symbol table.
199
0
      if (GV->hasLocalLinkage()) {
200
0
        P.first->second = -1;
201
0
        return -1;
202
0
      }
203
0
      llvm::raw_string_ostream OS(Name);
204
0
      Mang.getNameWithPrefix(OS, GV, false);
205
0
    } else {
206
0
      Name = std::string(C->getName());
207
0
    }
208
209
0
    storage::Comdat Comdat;
210
0
    setStr(Comdat.Name, Saver.save(Name));
211
0
    Comdat.SelectionKind = C->getSelectionKind();
212
0
    Comdats.push_back(Comdat);
213
0
  }
214
215
0
  return P.first->second;
216
0
}
217
218
0
static DenseSet<StringRef> buildPreservedSymbolsSet() {
219
0
  return DenseSet<StringRef>(std::begin(PreservedSymbols),
220
0
                             std::end(PreservedSymbols));
221
0
}
222
223
Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
224
                         const SmallPtrSet<GlobalValue *, 4> &Used,
225
0
                         ModuleSymbolTable::Symbol Msym) {
226
0
  Syms.emplace_back();
227
0
  storage::Symbol &Sym = Syms.back();
228
0
  Sym = {};
229
230
0
  storage::Uncommon *Unc = nullptr;
231
0
  auto Uncommon = [&]() -> storage::Uncommon & {
232
0
    if (Unc)
233
0
      return *Unc;
234
0
    Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon;
235
0
    Uncommons.emplace_back();
236
0
    Unc = &Uncommons.back();
237
0
    *Unc = {};
238
0
    setStr(Unc->COFFWeakExternFallbackName, "");
239
0
    setStr(Unc->SectionName, "");
240
0
    return *Unc;
241
0
  };
242
243
0
  SmallString<64> Name;
244
0
  {
245
0
    raw_svector_ostream OS(Name);
246
0
    Msymtab.printSymbolName(OS, Msym);
247
0
  }
248
0
  setStr(Sym.Name, Saver.save(Name.str()));
249
250
0
  auto Flags = Msymtab.getSymbolFlags(Msym);
251
0
  if (Flags & object::BasicSymbolRef::SF_Undefined)
252
0
    Sym.Flags |= 1 << storage::Symbol::FB_undefined;
253
0
  if (Flags & object::BasicSymbolRef::SF_Weak)
254
0
    Sym.Flags |= 1 << storage::Symbol::FB_weak;
255
0
  if (Flags & object::BasicSymbolRef::SF_Common)
256
0
    Sym.Flags |= 1 << storage::Symbol::FB_common;
257
0
  if (Flags & object::BasicSymbolRef::SF_Indirect)
258
0
    Sym.Flags |= 1 << storage::Symbol::FB_indirect;
259
0
  if (Flags & object::BasicSymbolRef::SF_Global)
260
0
    Sym.Flags |= 1 << storage::Symbol::FB_global;
261
0
  if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
262
0
    Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
263
0
  if (Flags & object::BasicSymbolRef::SF_Executable)
264
0
    Sym.Flags |= 1 << storage::Symbol::FB_executable;
265
266
0
  Sym.ComdatIndex = -1;
267
0
  auto *GV = dyn_cast_if_present<GlobalValue *>(Msym);
268
0
  if (!GV) {
269
    // Undefined module asm symbols act as GC roots and are implicitly used.
270
0
    if (Flags & object::BasicSymbolRef::SF_Undefined)
271
0
      Sym.Flags |= 1 << storage::Symbol::FB_used;
272
0
    setStr(Sym.IRName, "");
273
0
    return Error::success();
274
0
  }
275
276
0
  setStr(Sym.IRName, GV->getName());
277
278
0
  static const DenseSet<StringRef> PreservedSymbolsSet =
279
0
      buildPreservedSymbolsSet();
280
0
  bool IsPreservedSymbol = PreservedSymbolsSet.contains(GV->getName());
281
282
0
  if (Used.count(GV) || IsPreservedSymbol)
283
0
    Sym.Flags |= 1 << storage::Symbol::FB_used;
284
0
  if (GV->isThreadLocal())
285
0
    Sym.Flags |= 1 << storage::Symbol::FB_tls;
286
0
  if (GV->hasGlobalUnnamedAddr())
287
0
    Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
288
0
  if (GV->canBeOmittedFromSymbolTable())
289
0
    Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
290
0
  Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
291
292
0
  if (Flags & object::BasicSymbolRef::SF_Common) {
293
0
    auto *GVar = dyn_cast<GlobalVariable>(GV);
294
0
    if (!GVar)
295
0
      return make_error<StringError>("Only variables can have common linkage!",
296
0
                                     inconvertibleErrorCode());
297
0
    Uncommon().CommonSize =
298
0
        GV->getParent()->getDataLayout().getTypeAllocSize(GV->getValueType());
299
0
    Uncommon().CommonAlign = GVar->getAlign() ? GVar->getAlign()->value() : 0;
300
0
  }
301
302
0
  const GlobalObject *GO = GV->getAliaseeObject();
303
0
  if (!GO) {
304
0
    if (isa<GlobalIFunc>(GV))
305
0
      GO = cast<GlobalIFunc>(GV)->getResolverFunction();
306
0
    if (!GO)
307
0
      return make_error<StringError>("Unable to determine comdat of alias!",
308
0
                                     inconvertibleErrorCode());
309
0
  }
310
0
  if (const Comdat *C = GO->getComdat()) {
311
0
    Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());
312
0
    if (!ComdatIndexOrErr)
313
0
      return ComdatIndexOrErr.takeError();
314
0
    Sym.ComdatIndex = *ComdatIndexOrErr;
315
0
  }
316
317
0
  if (TT.isOSBinFormatCOFF()) {
318
0
    emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
319
320
0
    if ((Flags & object::BasicSymbolRef::SF_Weak) &&
321
0
        (Flags & object::BasicSymbolRef::SF_Indirect)) {
322
0
      auto *Fallback = dyn_cast<GlobalValue>(
323
0
          cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts());
324
0
      if (!Fallback)
325
0
        return make_error<StringError>("Invalid weak external",
326
0
                                       inconvertibleErrorCode());
327
0
      std::string FallbackName;
328
0
      raw_string_ostream OS(FallbackName);
329
0
      Msymtab.printSymbolName(OS, Fallback);
330
0
      OS.flush();
331
0
      setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
332
0
    }
333
0
  }
334
335
0
  if (!GO->getSection().empty())
336
0
    setStr(Uncommon().SectionName, Saver.save(GO->getSection()));
337
338
0
  return Error::success();
339
0
}
340
341
0
Error Builder::build(ArrayRef<Module *> IRMods) {
342
0
  storage::Header Hdr;
343
344
0
  assert(!IRMods.empty());
345
0
  Hdr.Version = storage::Header::kCurrentVersion;
346
0
  setStr(Hdr.Producer, kExpectedProducerName);
347
0
  setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
348
0
  setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
349
0
  TT = Triple(IRMods[0]->getTargetTriple());
350
351
0
  for (auto *M : IRMods)
352
0
    if (Error Err = addModule(M))
353
0
      return Err;
354
355
0
  COFFLinkerOptsOS.flush();
356
0
  setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts));
357
358
  // We are about to fill in the header's range fields, so reserve space for it
359
  // and copy it in afterwards.
360
0
  Symtab.resize(sizeof(storage::Header));
361
0
  writeRange(Hdr.Modules, Mods);
362
0
  writeRange(Hdr.Comdats, Comdats);
363
0
  writeRange(Hdr.Symbols, Syms);
364
0
  writeRange(Hdr.Uncommons, Uncommons);
365
0
  writeRange(Hdr.DependentLibraries, DependentLibraries);
366
0
  *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
367
0
  return Error::success();
368
0
}
369
370
} // end anonymous namespace
371
372
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
373
                      StringTableBuilder &StrtabBuilder,
374
0
                      BumpPtrAllocator &Alloc) {
375
0
  return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
376
0
}
377
378
// Upgrade a vector of bitcode modules created by an old version of LLVM by
379
// creating an irsymtab for them in the current format.
380
0
static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {
381
0
  FileContents FC;
382
383
0
  LLVMContext Ctx;
384
0
  std::vector<Module *> Mods;
385
0
  std::vector<std::unique_ptr<Module>> OwnedMods;
386
0
  for (auto BM : BMs) {
387
0
    Expected<std::unique_ptr<Module>> MOrErr =
388
0
        BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
389
0
                         /*IsImporting*/ false);
390
0
    if (!MOrErr)
391
0
      return MOrErr.takeError();
392
393
0
    Mods.push_back(MOrErr->get());
394
0
    OwnedMods.push_back(std::move(*MOrErr));
395
0
  }
396
397
0
  StringTableBuilder StrtabBuilder(StringTableBuilder::RAW);
398
0
  BumpPtrAllocator Alloc;
399
0
  if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc))
400
0
    return std::move(E);
401
402
0
  StrtabBuilder.finalizeInOrder();
403
0
  FC.Strtab.resize(StrtabBuilder.getSize());
404
0
  StrtabBuilder.write((uint8_t *)FC.Strtab.data());
405
406
0
  FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},
407
0
                  {FC.Strtab.data(), FC.Strtab.size()}};
408
0
  return std::move(FC);
409
0
}
410
411
0
Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
412
0
  if (BFC.Mods.empty())
413
0
    return make_error<StringError>("Bitcode file does not contain any modules",
414
0
                                   inconvertibleErrorCode());
415
416
0
  if (!DisableBitcodeVersionUpgrade) {
417
0
    if (BFC.StrtabForSymtab.empty() ||
418
0
        BFC.Symtab.size() < sizeof(storage::Header))
419
0
      return upgrade(BFC.Mods);
420
421
    // We cannot use the regular reader to read the version and producer,
422
    // because it will expect the header to be in the current format. The only
423
    // thing we can rely on is that the version and producer will be present as
424
    // the first struct elements.
425
0
    auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());
426
0
    unsigned Version = Hdr->Version;
427
0
    StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);
428
0
    if (Version != storage::Header::kCurrentVersion ||
429
0
        Producer != kExpectedProducerName)
430
0
      return upgrade(BFC.Mods);
431
0
  }
432
433
0
  FileContents FC;
434
0
  FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},
435
0
                  {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};
436
437
  // Finally, make sure that the number of modules in the symbol table matches
438
  // the number of modules in the bitcode file. If they differ, it may mean that
439
  // the bitcode file was created by binary concatenation, so we need to create
440
  // a new symbol table from scratch.
441
0
  if (FC.TheReader.getNumModules() != BFC.Mods.size())
442
0
    return upgrade(std::move(BFC.Mods));
443
444
0
  return std::move(FC);
445
0
}