xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Object/IRSymtab.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains data definitions and a reader and builder for a symbol
10 // table for LLVM IR. Its purpose is to allow linkers and other consumers of
11 // bitcode files to efficiently read the symbol table for symbol resolution
12 // purposes without needing to construct a module in memory.
13 //
14 // As with most object files the symbol table has two parts: the symbol table
15 // itself and a string table which is referenced by the symbol table.
16 //
17 // A symbol table corresponds to a single bitcode file, which may consist of
18 // multiple modules, so symbol tables may likewise contain symbols for multiple
19 // modules.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_OBJECT_IRSYMTAB_H
24 #define LLVM_OBJECT_IRSYMTAB_H
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/iterator_range.h"
29 #include "llvm/IR/Comdat.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/Object/SymbolicFile.h"
32 #include "llvm/Support/Allocator.h"
33 #include "llvm/Support/Compiler.h"
34 #include "llvm/Support/Endian.h"
35 #include "llvm/Support/Error.h"
36 #include <cassert>
37 #include <cstdint>
38 #include <vector>
39 
40 namespace llvm {
41 
42 struct BitcodeFileContents;
43 class StringTableBuilder;
44 
45 namespace irsymtab {
46 
47 namespace storage {
48 
49 // The data structures in this namespace define the low-level serialization
50 // format. Clients that just want to read a symbol table should use the
51 // irsymtab::Reader class.
52 
53 using Word = support::ulittle32_t;
54 
55 /// A reference to a string in the string table.
56 struct Str {
57   Word Offset, Size;
58 
getStr59   StringRef get(StringRef Strtab) const {
60     return {Strtab.data() + Offset, Size};
61   }
62 };
63 
64 /// A reference to a range of objects in the symbol table.
65 template <typename T> struct Range {
66   Word Offset, Size;
67 
getRange68   ArrayRef<T> get(StringRef Symtab) const {
69     return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size};
70   }
71 };
72 
73 /// Describes the range of a particular module's symbols within the symbol
74 /// table.
75 struct Module {
76   Word Begin, End;
77 
78   /// The index of the first Uncommon for this Module.
79   Word UncBegin;
80 };
81 
82 /// This is equivalent to an IR comdat.
83 struct Comdat {
84   Str Name;
85 
86   // llvm::Comdat::SelectionKind
87   Word SelectionKind;
88 };
89 
90 /// Contains the information needed by linkers for symbol resolution, as well as
91 /// by the LTO implementation itself.
92 struct Symbol {
93   /// The mangled symbol name.
94   Str Name;
95 
96   /// The unmangled symbol name, or the empty string if this is not an IR
97   /// symbol.
98   Str IRName;
99 
100   /// The index into Header::Comdats, or -1 if not a comdat member.
101   Word ComdatIndex;
102 
103   Word Flags;
104   enum FlagBits {
105     FB_visibility, // 2 bits
106     FB_has_uncommon = FB_visibility + 2,
107     FB_undefined,
108     FB_weak,
109     FB_common,
110     FB_indirect,
111     FB_used,
112     FB_tls,
113     FB_may_omit,
114     FB_global,
115     FB_format_specific,
116     FB_unnamed_addr,
117     FB_executable,
118   };
119 };
120 
121 /// This data structure contains rarely used symbol fields and is optionally
122 /// referenced by a Symbol.
123 struct Uncommon {
124   Word CommonSize, CommonAlign;
125 
126   /// COFF-specific: the name of the symbol that a weak external resolves to
127   /// if not defined.
128   Str COFFWeakExternFallbackName;
129 
130   /// Specified section name, if any.
131   Str SectionName;
132 };
133 
134 
135 struct Header {
136   /// Version number of the symtab format. This number should be incremented
137   /// when the format changes, but it does not need to be incremented if a
138   /// change to LLVM would cause it to create a different symbol table.
139   Word Version;
140   enum { kCurrentVersion = 3 };
141 
142   /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
143   /// Consumers should rebuild the symbol table from IR if the producer's
144   /// version does not match the consumer's version due to potential differences
145   /// in symbol table format, symbol enumeration order and so on.
146   Str Producer;
147 
148   Range<Module> Modules;
149   Range<Comdat> Comdats;
150   Range<Symbol> Symbols;
151   Range<Uncommon> Uncommons;
152 
153   Str TargetTriple, SourceFileName;
154 
155   /// COFF-specific: linker directives.
156   Str COFFLinkerOpts;
157 
158   /// Dependent Library Specifiers
159   Range<Str> DependentLibraries;
160 };
161 
162 } // end namespace storage
163 
164 /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for
165 /// Mods.
166 LLVM_ABI Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
167                      StringTableBuilder &StrtabBuilder,
168                      BumpPtrAllocator &Alloc);
169 
170 /// This represents a symbol that has been read from a storage::Symbol and
171 /// possibly a storage::Uncommon.
172 struct Symbol {
173   // Copied from storage::Symbol.
174   mutable StringRef Name;
175   StringRef IRName;
176   int ComdatIndex;
177   uint32_t Flags;
178 
179   // Copied from storage::Uncommon.
180   uint32_t CommonSize, CommonAlign;
181   StringRef COFFWeakExternFallbackName;
182   StringRef SectionName;
183 
184   /// Returns the mangled symbol name.
getNameSymbol185   StringRef getName() const { return Name; }
186 
187   /// Returns the unmangled symbol name, or the empty string if this is not an
188   /// IR symbol.
getIRNameSymbol189   StringRef getIRName() const { return IRName; }
190 
191   /// Returns the index into the comdat table (see Reader::getComdatTable()), or
192   /// -1 if not a comdat member.
getComdatIndexSymbol193   int getComdatIndex() const { return ComdatIndex; }
194 
195   using S = storage::Symbol;
196 
getVisibilitySymbol197   GlobalValue::VisibilityTypes getVisibility() const {
198     return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3);
199   }
200 
isUndefinedSymbol201   bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; }
isWeakSymbol202   bool isWeak() const { return (Flags >> S::FB_weak) & 1; }
isCommonSymbol203   bool isCommon() const { return (Flags >> S::FB_common) & 1; }
isIndirectSymbol204   bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; }
isUsedSymbol205   bool isUsed() const { return (Flags >> S::FB_used) & 1; }
isTLSSymbol206   bool isTLS() const { return (Flags >> S::FB_tls) & 1; }
207 
canBeOmittedFromSymbolTableSymbol208   bool canBeOmittedFromSymbolTable() const {
209     return (Flags >> S::FB_may_omit) & 1;
210   }
211 
isGlobalSymbol212   bool isGlobal() const { return (Flags >> S::FB_global) & 1; }
isFormatSpecificSymbol213   bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; }
isUnnamedAddrSymbol214   bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; }
isExecutableSymbol215   bool isExecutable() const { return (Flags >> S::FB_executable) & 1; }
216 
getCommonSizeSymbol217   uint64_t getCommonSize() const {
218     assert(isCommon());
219     return CommonSize;
220   }
221 
getCommonAlignmentSymbol222   uint32_t getCommonAlignment() const {
223     assert(isCommon());
224     return CommonAlign;
225   }
226 
227   /// COFF-specific: for weak externals, returns the name of the symbol that is
228   /// used as a fallback if the weak external remains undefined.
getCOFFWeakExternalFallbackSymbol229   StringRef getCOFFWeakExternalFallback() const {
230     assert(isWeak() && isIndirect());
231     return COFFWeakExternFallbackName;
232   }
233 
getSectionNameSymbol234   StringRef getSectionName() const { return SectionName; }
235 };
236 
237 /// This class can be used to read a Symtab and Strtab produced by
238 /// irsymtab::build.
239 class Reader {
240   StringRef Symtab, Strtab;
241 
242   ArrayRef<storage::Module> Modules;
243   ArrayRef<storage::Comdat> Comdats;
244   ArrayRef<storage::Symbol> Symbols;
245   ArrayRef<storage::Uncommon> Uncommons;
246   ArrayRef<storage::Str> DependentLibraries;
247 
str(storage::Str S)248   StringRef str(storage::Str S) const { return S.get(Strtab); }
249 
range(storage::Range<T> R)250   template <typename T> ArrayRef<T> range(storage::Range<T> R) const {
251     return R.get(Symtab);
252   }
253 
header()254   const storage::Header &header() const {
255     return *reinterpret_cast<const storage::Header *>(Symtab.data());
256   }
257 
258 public:
259   class SymbolRef;
260 
261   Reader() = default;
Reader(StringRef Symtab,StringRef Strtab)262   Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {
263     Modules = range(header().Modules);
264     Comdats = range(header().Comdats);
265     Symbols = range(header().Symbols);
266     Uncommons = range(header().Uncommons);
267     DependentLibraries = range(header().DependentLibraries);
268   }
269 
270   using symbol_range = iterator_range<object::content_iterator<SymbolRef>>;
271 
272   /// Returns the symbol table for the entire bitcode file.
273   /// The symbols enumerated by this method are ephemeral, but they can be
274   /// copied into an irsymtab::Symbol object.
275   symbol_range symbols() const;
276 
getNumModules()277   size_t getNumModules() const { return Modules.size(); }
278 
279   /// Returns a slice of the symbol table for the I'th module in the file.
280   /// The symbols enumerated by this method are ephemeral, but they can be
281   /// copied into an irsymtab::Symbol object.
282   symbol_range module_symbols(unsigned I) const;
283 
getTargetTriple()284   StringRef getTargetTriple() const { return str(header().TargetTriple); }
285 
286   /// Returns the source file path specified at compile time.
getSourceFileName()287   StringRef getSourceFileName() const { return str(header().SourceFileName); }
288 
289   /// Returns a table with all the comdats used by this file.
290   std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>>
getComdatTable()291   getComdatTable() const {
292     std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable;
293     ComdatTable.reserve(Comdats.size());
294     for (auto C : Comdats)
295       ComdatTable.push_back({str(C.Name), llvm::Comdat::SelectionKind(
296                                               uint32_t(C.SelectionKind))});
297     return ComdatTable;
298   }
299 
300   /// COFF-specific: returns linker options specified in the input file.
getCOFFLinkerOpts()301   StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
302 
303   /// Returns dependent library specifiers
getDependentLibraries()304   std::vector<StringRef> getDependentLibraries() const {
305     std::vector<StringRef> Specifiers;
306     Specifiers.reserve(DependentLibraries.size());
307     for (auto S : DependentLibraries) {
308       Specifiers.push_back(str(S));
309     }
310     return Specifiers;
311   }
312 };
313 
314 /// Ephemeral symbols produced by Reader::symbols() and
315 /// Reader::module_symbols().
316 class Reader::SymbolRef : public Symbol {
317   const storage::Symbol *SymI, *SymE;
318   const storage::Uncommon *UncI;
319   const Reader *R;
320 
read()321   void read() {
322     if (SymI == SymE)
323       return;
324 
325     Name = R->str(SymI->Name);
326     IRName = R->str(SymI->IRName);
327     ComdatIndex = SymI->ComdatIndex;
328     Flags = SymI->Flags;
329 
330     if (Flags & (1 << storage::Symbol::FB_has_uncommon)) {
331       CommonSize = UncI->CommonSize;
332       CommonAlign = UncI->CommonAlign;
333       COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
334       SectionName = R->str(UncI->SectionName);
335     } else
336       // Reset this field so it can be queried unconditionally for all symbols.
337       SectionName = "";
338   }
339 
340 public:
SymbolRef(const storage::Symbol * SymI,const storage::Symbol * SymE,const storage::Uncommon * UncI,const Reader * R)341   SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
342             const storage::Uncommon *UncI, const Reader *R)
343       : SymI(SymI), SymE(SymE), UncI(UncI), R(R) {
344     read();
345   }
346 
moveNext()347   void moveNext() {
348     ++SymI;
349     if (Flags & (1 << storage::Symbol::FB_has_uncommon))
350       ++UncI;
351     read();
352   }
353 
354   bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; }
355 };
356 
symbols()357 inline Reader::symbol_range Reader::symbols() const {
358   return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this),
359           SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)};
360 }
361 
module_symbols(unsigned I)362 inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
363   const storage::Module &M = Modules[I];
364   const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
365                         *MEnd = Symbols.begin() + M.End;
366   return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this),
367           SymbolRef(MEnd, MEnd, nullptr, this)};
368 }
369 
370 /// The contents of the irsymtab in a bitcode file. Any underlying data for the
371 /// irsymtab are owned by Symtab and Strtab.
372 struct FileContents {
373   SmallVector<char, 0> Symtab, Strtab;
374   Reader TheReader;
375 };
376 
377 /// Reads the contents of a bitcode file, creating its irsymtab if necessary.
378 LLVM_ABI Expected<FileContents> readBitcode(const BitcodeFileContents &BFC);
379 
380 } // end namespace irsymtab
381 } // end namespace llvm
382 
383 #endif // LLVM_OBJECT_IRSYMTAB_H
384