xref: /freebsd/contrib/llvm-project/llvm/lib/Object/IRSymtab.cpp (revision d97d838569232dfad536593ef9ee6bcc366a03f3)
1  //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/Object/IRSymtab.h"
10  #include "llvm/ADT/ArrayRef.h"
11  #include "llvm/ADT/DenseMap.h"
12  #include "llvm/ADT/SmallPtrSet.h"
13  #include "llvm/ADT/SmallString.h"
14  #include "llvm/ADT/SmallVector.h"
15  #include "llvm/ADT/StringRef.h"
16  #include "llvm/ADT/Triple.h"
17  #include "llvm/Config/llvm-config.h"
18  #include "llvm/IR/Comdat.h"
19  #include "llvm/IR/DataLayout.h"
20  #include "llvm/IR/GlobalAlias.h"
21  #include "llvm/IR/GlobalObject.h"
22  #include "llvm/IR/Mangler.h"
23  #include "llvm/IR/Metadata.h"
24  #include "llvm/IR/Module.h"
25  #include "llvm/Bitcode/BitcodeReader.h"
26  #include "llvm/MC/StringTableBuilder.h"
27  #include "llvm/Object/IRObjectFile.h"
28  #include "llvm/Object/ModuleSymbolTable.h"
29  #include "llvm/Object/SymbolicFile.h"
30  #include "llvm/Support/Allocator.h"
31  #include "llvm/Support/Casting.h"
32  #include "llvm/Support/Error.h"
33  #include "llvm/Support/StringSaver.h"
34  #include "llvm/Support/VCSRevision.h"
35  #include "llvm/Support/raw_ostream.h"
36  #include <cassert>
37  #include <string>
38  #include <utility>
39  #include <vector>
40  
41  using namespace llvm;
42  using namespace irsymtab;
43  
44  static const char *LibcallRoutineNames[] = {
45  #define HANDLE_LIBCALL(code, name) name,
46  #include "llvm/IR/RuntimeLibcalls.def"
47  #undef HANDLE_LIBCALL
48  };
49  
50  namespace {
51  
52  const char *getExpectedProducerName() {
53    static char DefaultName[] = LLVM_VERSION_STRING
54  #ifdef LLVM_REVISION
55        " " LLVM_REVISION
56  #endif
57        ;
58    // Allows for testing of the irsymtab writer and upgrade mechanism. This
59    // environment variable should not be set by users.
60    if (char *OverrideName = getenv("LLVM_OVERRIDE_PRODUCER"))
61      return OverrideName;
62    return DefaultName;
63  }
64  
65  const char *kExpectedProducerName = getExpectedProducerName();
66  
67  /// Stores the temporary state that is required to build an IR symbol table.
68  struct Builder {
69    SmallVector<char, 0> &Symtab;
70    StringTableBuilder &StrtabBuilder;
71    StringSaver Saver;
72  
73    // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
74    // The StringTableBuilder does not create a copy of any strings added to it,
75    // so this provides somewhere to store any strings that we create.
76    Builder(SmallVector<char, 0> &Symtab, StringTableBuilder &StrtabBuilder,
77            BumpPtrAllocator &Alloc)
78        : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {}
79  
80    DenseMap<const Comdat *, int> ComdatMap;
81    Mangler Mang;
82    Triple TT;
83  
84    std::vector<storage::Comdat> Comdats;
85    std::vector<storage::Module> Mods;
86    std::vector<storage::Symbol> Syms;
87    std::vector<storage::Uncommon> Uncommons;
88  
89    std::string COFFLinkerOpts;
90    raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
91  
92    std::vector<storage::Str> DependentLibraries;
93  
94    void setStr(storage::Str &S, StringRef Value) {
95      S.Offset = StrtabBuilder.add(Value);
96      S.Size = Value.size();
97    }
98  
99    template <typename T>
100    void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
101      R.Offset = Symtab.size();
102      R.Size = Objs.size();
103      Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),
104                    reinterpret_cast<const char *>(Objs.data() + Objs.size()));
105    }
106  
107    Expected<int> getComdatIndex(const Comdat *C, const Module *M);
108  
109    Error addModule(Module *M);
110    Error addSymbol(const ModuleSymbolTable &Msymtab,
111                    const SmallPtrSet<GlobalValue *, 8> &Used,
112                    ModuleSymbolTable::Symbol Sym);
113  
114    Error build(ArrayRef<Module *> Mods);
115  };
116  
117  Error Builder::addModule(Module *M) {
118    if (M->getDataLayoutStr().empty())
119      return make_error<StringError>("input module has no datalayout",
120                                     inconvertibleErrorCode());
121  
122    SmallPtrSet<GlobalValue *, 8> Used;
123    collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
124  
125    ModuleSymbolTable Msymtab;
126    Msymtab.addModule(M);
127  
128    storage::Module Mod;
129    Mod.Begin = Syms.size();
130    Mod.End = Syms.size() + Msymtab.symbols().size();
131    Mod.UncBegin = Uncommons.size();
132    Mods.push_back(Mod);
133  
134    if (TT.isOSBinFormatCOFF()) {
135      if (auto E = M->materializeMetadata())
136        return E;
137      if (NamedMDNode *LinkerOptions =
138              M->getNamedMetadata("llvm.linker.options")) {
139        for (MDNode *MDOptions : LinkerOptions->operands())
140          for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
141            COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
142      }
143    }
144  
145    if (TT.isOSBinFormatELF()) {
146      if (auto E = M->materializeMetadata())
147        return E;
148      if (NamedMDNode *N = M->getNamedMetadata("llvm.dependent-libraries")) {
149        for (MDNode *MDOptions : N->operands()) {
150          const auto OperandStr =
151              cast<MDString>(cast<MDNode>(MDOptions)->getOperand(0))->getString();
152          storage::Str Specifier;
153          setStr(Specifier, OperandStr);
154          DependentLibraries.emplace_back(Specifier);
155        }
156      }
157    }
158  
159    for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
160      if (Error Err = addSymbol(Msymtab, Used, Msym))
161        return Err;
162  
163    return Error::success();
164  }
165  
166  Expected<int> Builder::getComdatIndex(const Comdat *C, const Module *M) {
167    auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
168    if (P.second) {
169      std::string Name;
170      if (TT.isOSBinFormatCOFF()) {
171        const GlobalValue *GV = M->getNamedValue(C->getName());
172        if (!GV)
173          return make_error<StringError>("Could not find leader",
174                                         inconvertibleErrorCode());
175        // Internal leaders do not affect symbol resolution, therefore they do not
176        // appear in the symbol table.
177        if (GV->hasLocalLinkage()) {
178          P.first->second = -1;
179          return -1;
180        }
181        llvm::raw_string_ostream OS(Name);
182        Mang.getNameWithPrefix(OS, GV, false);
183      } else {
184        Name = C->getName();
185      }
186  
187      storage::Comdat Comdat;
188      setStr(Comdat.Name, Saver.save(Name));
189      Comdats.push_back(Comdat);
190    }
191  
192    return P.first->second;
193  }
194  
195  Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
196                           const SmallPtrSet<GlobalValue *, 8> &Used,
197                           ModuleSymbolTable::Symbol Msym) {
198    Syms.emplace_back();
199    storage::Symbol &Sym = Syms.back();
200    Sym = {};
201  
202    storage::Uncommon *Unc = nullptr;
203    auto Uncommon = [&]() -> storage::Uncommon & {
204      if (Unc)
205        return *Unc;
206      Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon;
207      Uncommons.emplace_back();
208      Unc = &Uncommons.back();
209      *Unc = {};
210      setStr(Unc->COFFWeakExternFallbackName, "");
211      setStr(Unc->SectionName, "");
212      return *Unc;
213    };
214  
215    SmallString<64> Name;
216    {
217      raw_svector_ostream OS(Name);
218      Msymtab.printSymbolName(OS, Msym);
219    }
220    setStr(Sym.Name, Saver.save(StringRef(Name)));
221  
222    auto Flags = Msymtab.getSymbolFlags(Msym);
223    if (Flags & object::BasicSymbolRef::SF_Undefined)
224      Sym.Flags |= 1 << storage::Symbol::FB_undefined;
225    if (Flags & object::BasicSymbolRef::SF_Weak)
226      Sym.Flags |= 1 << storage::Symbol::FB_weak;
227    if (Flags & object::BasicSymbolRef::SF_Common)
228      Sym.Flags |= 1 << storage::Symbol::FB_common;
229    if (Flags & object::BasicSymbolRef::SF_Indirect)
230      Sym.Flags |= 1 << storage::Symbol::FB_indirect;
231    if (Flags & object::BasicSymbolRef::SF_Global)
232      Sym.Flags |= 1 << storage::Symbol::FB_global;
233    if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
234      Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
235    if (Flags & object::BasicSymbolRef::SF_Executable)
236      Sym.Flags |= 1 << storage::Symbol::FB_executable;
237  
238    Sym.ComdatIndex = -1;
239    auto *GV = Msym.dyn_cast<GlobalValue *>();
240    if (!GV) {
241      // Undefined module asm symbols act as GC roots and are implicitly used.
242      if (Flags & object::BasicSymbolRef::SF_Undefined)
243        Sym.Flags |= 1 << storage::Symbol::FB_used;
244      setStr(Sym.IRName, "");
245      return Error::success();
246    }
247  
248    setStr(Sym.IRName, GV->getName());
249  
250    bool IsBuiltinFunc = false;
251  
252    for (const char *LibcallName : LibcallRoutineNames)
253      if (GV->getName() == LibcallName)
254        IsBuiltinFunc = true;
255  
256    if (Used.count(GV) || IsBuiltinFunc)
257      Sym.Flags |= 1 << storage::Symbol::FB_used;
258    if (GV->isThreadLocal())
259      Sym.Flags |= 1 << storage::Symbol::FB_tls;
260    if (GV->hasGlobalUnnamedAddr())
261      Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
262    if (GV->canBeOmittedFromSymbolTable())
263      Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
264    Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
265  
266    if (Flags & object::BasicSymbolRef::SF_Common) {
267      Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize(
268          GV->getType()->getElementType());
269      Uncommon().CommonAlign = GV->getAlignment();
270    }
271  
272    const GlobalObject *Base = GV->getBaseObject();
273    if (!Base)
274      return make_error<StringError>("Unable to determine comdat of alias!",
275                                     inconvertibleErrorCode());
276    if (const Comdat *C = Base->getComdat()) {
277      Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());
278      if (!ComdatIndexOrErr)
279        return ComdatIndexOrErr.takeError();
280      Sym.ComdatIndex = *ComdatIndexOrErr;
281    }
282  
283    if (TT.isOSBinFormatCOFF()) {
284      emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
285  
286      if ((Flags & object::BasicSymbolRef::SF_Weak) &&
287          (Flags & object::BasicSymbolRef::SF_Indirect)) {
288        auto *Fallback = dyn_cast<GlobalValue>(
289            cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts());
290        if (!Fallback)
291          return make_error<StringError>("Invalid weak external",
292                                         inconvertibleErrorCode());
293        std::string FallbackName;
294        raw_string_ostream OS(FallbackName);
295        Msymtab.printSymbolName(OS, Fallback);
296        OS.flush();
297        setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
298      }
299    }
300  
301    if (!Base->getSection().empty())
302      setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
303  
304    return Error::success();
305  }
306  
307  Error Builder::build(ArrayRef<Module *> IRMods) {
308    storage::Header Hdr;
309  
310    assert(!IRMods.empty());
311    Hdr.Version = storage::Header::kCurrentVersion;
312    setStr(Hdr.Producer, kExpectedProducerName);
313    setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
314    setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
315    TT = Triple(IRMods[0]->getTargetTriple());
316  
317    for (auto *M : IRMods)
318      if (Error Err = addModule(M))
319        return Err;
320  
321    COFFLinkerOptsOS.flush();
322    setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts));
323  
324    // We are about to fill in the header's range fields, so reserve space for it
325    // and copy it in afterwards.
326    Symtab.resize(sizeof(storage::Header));
327    writeRange(Hdr.Modules, Mods);
328    writeRange(Hdr.Comdats, Comdats);
329    writeRange(Hdr.Symbols, Syms);
330    writeRange(Hdr.Uncommons, Uncommons);
331    writeRange(Hdr.DependentLibraries, DependentLibraries);
332    *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
333    return Error::success();
334  }
335  
336  } // end anonymous namespace
337  
338  Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
339                        StringTableBuilder &StrtabBuilder,
340                        BumpPtrAllocator &Alloc) {
341    return Builder(Symtab, StrtabBuilder, Alloc).build(Mods);
342  }
343  
344  // Upgrade a vector of bitcode modules created by an old version of LLVM by
345  // creating an irsymtab for them in the current format.
346  static Expected<FileContents> upgrade(ArrayRef<BitcodeModule> BMs) {
347    FileContents FC;
348  
349    LLVMContext Ctx;
350    std::vector<Module *> Mods;
351    std::vector<std::unique_ptr<Module>> OwnedMods;
352    for (auto BM : BMs) {
353      Expected<std::unique_ptr<Module>> MOrErr =
354          BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
355                           /*IsImporting*/ false);
356      if (!MOrErr)
357        return MOrErr.takeError();
358  
359      Mods.push_back(MOrErr->get());
360      OwnedMods.push_back(std::move(*MOrErr));
361    }
362  
363    StringTableBuilder StrtabBuilder(StringTableBuilder::RAW);
364    BumpPtrAllocator Alloc;
365    if (Error E = build(Mods, FC.Symtab, StrtabBuilder, Alloc))
366      return std::move(E);
367  
368    StrtabBuilder.finalizeInOrder();
369    FC.Strtab.resize(StrtabBuilder.getSize());
370    StrtabBuilder.write((uint8_t *)FC.Strtab.data());
371  
372    FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()},
373                    {FC.Strtab.data(), FC.Strtab.size()}};
374    return std::move(FC);
375  }
376  
377  Expected<FileContents> irsymtab::readBitcode(const BitcodeFileContents &BFC) {
378    if (BFC.Mods.empty())
379      return make_error<StringError>("Bitcode file does not contain any modules",
380                                     inconvertibleErrorCode());
381  
382    if (BFC.StrtabForSymtab.empty() ||
383        BFC.Symtab.size() < sizeof(storage::Header))
384      return upgrade(BFC.Mods);
385  
386    // We cannot use the regular reader to read the version and producer, because
387    // it will expect the header to be in the current format. The only thing we
388    // can rely on is that the version and producer will be present as the first
389    // struct elements.
390    auto *Hdr = reinterpret_cast<const storage::Header *>(BFC.Symtab.data());
391    unsigned Version = Hdr->Version;
392    StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab);
393    if (Version != storage::Header::kCurrentVersion ||
394        Producer != kExpectedProducerName)
395      return upgrade(BFC.Mods);
396  
397    FileContents FC;
398    FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()},
399                    {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}};
400  
401    // Finally, make sure that the number of modules in the symbol table matches
402    // the number of modules in the bitcode file. If they differ, it may mean that
403    // the bitcode file was created by binary concatenation, so we need to create
404    // a new symbol table from scratch.
405    if (FC.TheReader.getNumModules() != BFC.Mods.size())
406      return upgrade(std::move(BFC.Mods));
407  
408    return std::move(FC);
409  }
410