xref: /freebsd/contrib/llvm-project/lld/ELF/Symbols.h (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
10b57cec5SDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines various types of Symbols.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLD_ELF_SYMBOLS_H
140b57cec5SDimitry Andric #define LLD_ELF_SYMBOLS_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "InputFiles.h"
170b57cec5SDimitry Andric #include "InputSection.h"
180b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
190eae32dcSDimitry Andric #include "lld/Common/Memory.h"
200b57cec5SDimitry Andric #include "lld/Common/Strings.h"
215ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h"
220b57cec5SDimitry Andric #include "llvm/Object/Archive.h"
230b57cec5SDimitry Andric #include "llvm/Object/ELF.h"
24349cc55cSDimitry Andric #include <tuple>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace lld {
275ffd83dbSDimitry Andric // Returns a string representation for a symbol for diagnostics.
2885868e8aSDimitry Andric std::string toString(const elf::Symbol &);
2985868e8aSDimitry Andric 
3085868e8aSDimitry Andric // There are two different ways to convert an Archive::Symbol to a string:
3185868e8aSDimitry Andric // One for Microsoft name mangling and one for Itanium name mangling.
3285868e8aSDimitry Andric // Call the functions toCOFFString and toELFString, not just toString.
3385868e8aSDimitry Andric std::string toELFString(const llvm::object::Archive::Symbol &);
3485868e8aSDimitry Andric 
350b57cec5SDimitry Andric namespace elf {
360b57cec5SDimitry Andric class CommonSymbol;
370b57cec5SDimitry Andric class Defined;
380b57cec5SDimitry Andric class InputFile;
390b57cec5SDimitry Andric class LazyArchive;
400b57cec5SDimitry Andric class LazyObject;
410b57cec5SDimitry Andric class SharedSymbol;
420b57cec5SDimitry Andric class Symbol;
430b57cec5SDimitry Andric class Undefined;
440b57cec5SDimitry Andric 
45*04eeddc0SDimitry Andric // Some index properties of a symbol are stored separately in this auxiliary
46*04eeddc0SDimitry Andric // struct to decrease sizeof(SymbolUnion) in the majority of cases.
47*04eeddc0SDimitry Andric struct SymbolAux {
48*04eeddc0SDimitry Andric   uint32_t gotIdx = -1;
49*04eeddc0SDimitry Andric   uint32_t pltIdx = -1;
50*04eeddc0SDimitry Andric   uint32_t tlsDescIdx = -1;
51*04eeddc0SDimitry Andric   uint32_t tlsGdIdx = -1;
520b57cec5SDimitry Andric };
530b57cec5SDimitry Andric 
54*04eeddc0SDimitry Andric extern SmallVector<SymbolAux, 0> symAux;
55*04eeddc0SDimitry Andric 
560b57cec5SDimitry Andric // The base class for real symbol classes.
570b57cec5SDimitry Andric class Symbol {
580b57cec5SDimitry Andric public:
590b57cec5SDimitry Andric   enum Kind {
600b57cec5SDimitry Andric     PlaceholderKind,
610b57cec5SDimitry Andric     DefinedKind,
620b57cec5SDimitry Andric     CommonKind,
630b57cec5SDimitry Andric     SharedKind,
640b57cec5SDimitry Andric     UndefinedKind,
650b57cec5SDimitry Andric     LazyArchiveKind,
660b57cec5SDimitry Andric     LazyObjectKind,
670b57cec5SDimitry Andric   };
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric   Kind kind() const { return static_cast<Kind>(symbolKind); }
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   // The file from which this symbol was created.
720b57cec5SDimitry Andric   InputFile *file;
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric protected:
750b57cec5SDimitry Andric   const char *nameData;
76*04eeddc0SDimitry Andric   // 32-bit size saves space.
77*04eeddc0SDimitry Andric   uint32_t nameSize;
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric public:
80*04eeddc0SDimitry Andric   // A symAux index used to access GOT/PLT entry indexes. This is allocated in
81*04eeddc0SDimitry Andric   // postScanRelocations().
82*04eeddc0SDimitry Andric   uint32_t auxIdx = -1;
830b57cec5SDimitry Andric   uint32_t dynsymIndex = 0;
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   // This field is a index to the symbol's version definition.
860eae32dcSDimitry Andric   uint16_t verdefIndex = -1;
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric   // Version definition index.
890b57cec5SDimitry Andric   uint16_t versionId;
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   // Symbol binding. This is not overwritten by replace() to track
920b57cec5SDimitry Andric   // changes during resolution. In particular:
930b57cec5SDimitry Andric   //  - An undefined weak is still weak when it resolves to a shared library.
944824e7fdSDimitry Andric   //  - An undefined weak will not extract archive members, but we have to
950b57cec5SDimitry Andric   //    remember it is weak.
960b57cec5SDimitry Andric   uint8_t binding;
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   // The following fields have the same meaning as the ELF symbol attributes.
990b57cec5SDimitry Andric   uint8_t type;    // symbol type
1000b57cec5SDimitry Andric   uint8_t stOther; // st_other field value
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   uint8_t symbolKind;
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   // Symbol visibility. This is the computed minimum visibility of all
1050b57cec5SDimitry Andric   // observed non-DSO symbols.
106480093f4SDimitry Andric   uint8_t visibility : 2;
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric   // True if the symbol was used for linking and thus need to be added to the
1090b57cec5SDimitry Andric   // output file's symbol table. This is true for all symbols except for
1100b57cec5SDimitry Andric   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
1110b57cec5SDimitry Andric   // are unreferenced except by other bitcode objects.
112480093f4SDimitry Andric   uint8_t isUsedInRegularObj : 1;
1130b57cec5SDimitry Andric 
11485868e8aSDimitry Andric   // Used by a Defined symbol with protected or default visibility, to record
11585868e8aSDimitry Andric   // whether it is required to be exported into .dynsym. This is set when any of
11685868e8aSDimitry Andric   // the following conditions hold:
11785868e8aSDimitry Andric   //
11885868e8aSDimitry Andric   // - If there is an interposable symbol from a DSO.
11985868e8aSDimitry Andric   // - If -shared or --export-dynamic is specified, any symbol in an object
12085868e8aSDimitry Andric   //   file/bitcode sets this property, unless suppressed by LTO
12185868e8aSDimitry Andric   //   canBeOmittedFromSymbolTable().
122480093f4SDimitry Andric   uint8_t exportDynamic : 1;
12385868e8aSDimitry Andric 
12485868e8aSDimitry Andric   // True if the symbol is in the --dynamic-list file. A Defined symbol with
12585868e8aSDimitry Andric   // protected or default visibility with this property is required to be
12685868e8aSDimitry Andric   // exported into .dynsym.
127480093f4SDimitry Andric   uint8_t inDynamicList : 1;
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
1300b57cec5SDimitry Andric   // is overwritten after LTO, LTO shouldn't inline the symbol because it
1310b57cec5SDimitry Andric   // doesn't know the final contents of the symbol.
132480093f4SDimitry Andric   uint8_t canInline : 1;
13385868e8aSDimitry Andric 
134e8d8bef9SDimitry Andric   // Used to track if there has been at least one undefined reference to the
135e8d8bef9SDimitry Andric   // symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK
136e8d8bef9SDimitry Andric   // if the first undefined reference from a non-shared object is weak.
137e8d8bef9SDimitry Andric   //
138e8d8bef9SDimitry Andric   // This is also used to retain __wrap_foo when foo is referenced.
139480093f4SDimitry Andric   uint8_t referenced : 1;
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   // True if this symbol is specified by --trace-symbol option.
142480093f4SDimitry Andric   uint8_t traced : 1;
1430b57cec5SDimitry Andric 
144*04eeddc0SDimitry Andric   // True if the name contains '@'.
145*04eeddc0SDimitry Andric   uint8_t hasVersionSuffix : 1;
146*04eeddc0SDimitry Andric 
14785868e8aSDimitry Andric   inline void replace(const Symbol &newSym);
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   bool includeInDynsym() const;
1500b57cec5SDimitry Andric   uint8_t computeBinding() const;
1510b57cec5SDimitry Andric   bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric   bool isUndefined() const { return symbolKind == UndefinedKind; }
1540b57cec5SDimitry Andric   bool isCommon() const { return symbolKind == CommonKind; }
1550b57cec5SDimitry Andric   bool isDefined() const { return symbolKind == DefinedKind; }
1560b57cec5SDimitry Andric   bool isShared() const { return symbolKind == SharedKind; }
1570b57cec5SDimitry Andric   bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric   bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   bool isLazy() const {
1620b57cec5SDimitry Andric     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric   // True if this is an undefined weak symbol. This only works once
1660b57cec5SDimitry Andric   // all input files have been added.
167349cc55cSDimitry Andric   bool isUndefWeak() const { return isWeak() && isUndefined(); }
1680b57cec5SDimitry Andric 
169*04eeddc0SDimitry Andric   StringRef getName() const { return {nameData, nameSize}; }
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   void setName(StringRef s) {
1720b57cec5SDimitry Andric     nameData = s.data();
1730b57cec5SDimitry Andric     nameSize = s.size();
1740b57cec5SDimitry Andric   }
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric   void parseSymbolVersion();
1770b57cec5SDimitry Andric 
178e8d8bef9SDimitry Andric   // Get the NUL-terminated version suffix ("", "@...", or "@@...").
179e8d8bef9SDimitry Andric   //
180e8d8bef9SDimitry Andric   // For @@, the name has been truncated by insert(). For @, the name has been
181e8d8bef9SDimitry Andric   // truncated by Symbol::parseSymbolVersion().
182*04eeddc0SDimitry Andric   const char *getVersionSuffix() const { return nameData + nameSize; }
183*04eeddc0SDimitry Andric 
184*04eeddc0SDimitry Andric   uint32_t getGotIdx() const {
185*04eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].gotIdx;
186*04eeddc0SDimitry Andric   }
187*04eeddc0SDimitry Andric   uint32_t getPltIdx() const {
188*04eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].pltIdx;
189*04eeddc0SDimitry Andric   }
190*04eeddc0SDimitry Andric   uint32_t getTlsDescIdx() const {
191*04eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].tlsDescIdx;
192*04eeddc0SDimitry Andric   }
193*04eeddc0SDimitry Andric   uint32_t getTlsGdIdx() const {
194*04eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].tlsGdIdx;
195e8d8bef9SDimitry Andric   }
196e8d8bef9SDimitry Andric 
197*04eeddc0SDimitry Andric   bool isInGot() const { return getGotIdx() != uint32_t(-1); }
198*04eeddc0SDimitry Andric   bool isInPlt() const { return getPltIdx() != uint32_t(-1); }
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   uint64_t getVA(int64_t addend = 0) const;
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric   uint64_t getGotOffset() const;
2030b57cec5SDimitry Andric   uint64_t getGotVA() const;
2040b57cec5SDimitry Andric   uint64_t getGotPltOffset() const;
2050b57cec5SDimitry Andric   uint64_t getGotPltVA() const;
2060b57cec5SDimitry Andric   uint64_t getPltVA() const;
2070b57cec5SDimitry Andric   uint64_t getSize() const;
2080b57cec5SDimitry Andric   OutputSection *getOutputSection() const;
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric   // The following two functions are used for symbol resolution.
2110b57cec5SDimitry Andric   //
2120b57cec5SDimitry Andric   // You are expected to call mergeProperties for all symbols in input
2130b57cec5SDimitry Andric   // files so that attributes that are attached to names rather than
2140b57cec5SDimitry Andric   // indivisual symbol (such as visibility) are merged together.
2150b57cec5SDimitry Andric   //
2160b57cec5SDimitry Andric   // Every time you read a new symbol from an input, you are supposed
2170b57cec5SDimitry Andric   // to call resolve() with the new symbol. That function replaces
2180b57cec5SDimitry Andric   // "this" object as a result of name resolution if the new symbol is
2190b57cec5SDimitry Andric   // more appropriate to be included in the output.
2200b57cec5SDimitry Andric   //
2210b57cec5SDimitry Andric   // For example, if "this" is an undefined symbol and a new symbol is
2220b57cec5SDimitry Andric   // a defined symbol, "this" is replaced with the new symbol.
2230b57cec5SDimitry Andric   void mergeProperties(const Symbol &other);
2240b57cec5SDimitry Andric   void resolve(const Symbol &other);
2250b57cec5SDimitry Andric 
2264824e7fdSDimitry Andric   // If this is a lazy symbol, extract an input file and add the symbol
2270b57cec5SDimitry Andric   // in the file to the symbol table. Calling this function on
2280b57cec5SDimitry Andric   // non-lazy object causes a runtime error.
2294824e7fdSDimitry Andric   void extract() const;
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric   static bool isExportDynamic(Kind k, uint8_t visibility) {
2320b57cec5SDimitry Andric     if (k == SharedKind)
2330b57cec5SDimitry Andric       return visibility == llvm::ELF::STV_DEFAULT;
2340b57cec5SDimitry Andric     return config->shared || config->exportDynamic;
2350b57cec5SDimitry Andric   }
2360b57cec5SDimitry Andric 
237fe6060f1SDimitry Andric private:
2380b57cec5SDimitry Andric   void resolveUndefined(const Undefined &other);
2390b57cec5SDimitry Andric   void resolveCommon(const CommonSymbol &other);
2400b57cec5SDimitry Andric   void resolveDefined(const Defined &other);
2410b57cec5SDimitry Andric   template <class LazyT> void resolveLazy(const LazyT &other);
2420b57cec5SDimitry Andric   void resolveShared(const SharedSymbol &other);
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   int compare(const Symbol *other) const;
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   inline size_t getSymbolSize() const;
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric protected:
249*04eeddc0SDimitry Andric   Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding,
2500b57cec5SDimitry Andric          uint8_t stOther, uint8_t type)
251*04eeddc0SDimitry Andric       : file(file), nameData(name.data()), nameSize(name.size()),
252*04eeddc0SDimitry Andric         binding(binding), type(type), stOther(stOther), symbolKind(k),
253*04eeddc0SDimitry Andric         visibility(stOther & 3),
2540b57cec5SDimitry Andric         isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind),
25585868e8aSDimitry Andric         exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false),
256*04eeddc0SDimitry Andric         canInline(false), referenced(false), traced(false),
257*04eeddc0SDimitry Andric         hasVersionSuffix(false), isInIplt(false), gotInIgot(false),
258*04eeddc0SDimitry Andric         isPreemptible(false), used(!config->gcSections), folded(false),
259*04eeddc0SDimitry Andric         needsTocRestore(false), scriptDefined(false), needsCopy(false),
260*04eeddc0SDimitry Andric         needsGot(false), needsPlt(false), needsTlsDesc(false),
2610eae32dcSDimitry Andric         needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false),
2620eae32dcSDimitry Andric         needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {}
2630b57cec5SDimitry Andric 
2640b57cec5SDimitry Andric public:
2650b57cec5SDimitry Andric   // True if this symbol is in the Iplt sub-section of the Plt and the Igot
2660b57cec5SDimitry Andric   // sub-section of the .got.plt or .got.
267480093f4SDimitry Andric   uint8_t isInIplt : 1;
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   // True if this symbol needs a GOT entry and its GOT entry is actually in
2700b57cec5SDimitry Andric   // Igot. This will be true only for certain non-preemptible ifuncs.
271480093f4SDimitry Andric   uint8_t gotInIgot : 1;
2720b57cec5SDimitry Andric 
2730b57cec5SDimitry Andric   // True if this symbol is preemptible at load time.
274480093f4SDimitry Andric   uint8_t isPreemptible : 1;
2750b57cec5SDimitry Andric 
2760b57cec5SDimitry Andric   // True if an undefined or shared symbol is used from a live section.
2775ffd83dbSDimitry Andric   //
2785ffd83dbSDimitry Andric   // NOTE: In Writer.cpp the field is used to mark local defined symbols
2795ffd83dbSDimitry Andric   // which are referenced by relocations when -r or --emit-relocs is given.
280480093f4SDimitry Andric   uint8_t used : 1;
2810b57cec5SDimitry Andric 
2820eae32dcSDimitry Andric   // True if defined relative to a section discarded by ICF.
2830eae32dcSDimitry Andric   uint8_t folded : 1;
2840eae32dcSDimitry Andric 
2850b57cec5SDimitry Andric   // True if a call to this symbol needs to be followed by a restore of the
2860b57cec5SDimitry Andric   // PPC64 toc pointer.
287480093f4SDimitry Andric   uint8_t needsTocRestore : 1;
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   // True if this symbol is defined by a linker script.
290480093f4SDimitry Andric   uint8_t scriptDefined : 1;
2910b57cec5SDimitry Andric 
2920eae32dcSDimitry Andric   // True if this symbol needs a canonical PLT entry, or (during
2930eae32dcSDimitry Andric   // postScanRelocations) a copy relocation.
2940eae32dcSDimitry Andric   uint8_t needsCopy : 1;
2950eae32dcSDimitry Andric 
2960eae32dcSDimitry Andric   // Temporary flags used to communicate which symbol entries need PLT and GOT
2970eae32dcSDimitry Andric   // entries during postScanRelocations();
2980eae32dcSDimitry Andric   uint8_t needsGot : 1;
2990eae32dcSDimitry Andric   uint8_t needsPlt : 1;
3000eae32dcSDimitry Andric   uint8_t needsTlsDesc : 1;
3010eae32dcSDimitry Andric   uint8_t needsTlsGd : 1;
3020eae32dcSDimitry Andric   uint8_t needsTlsGdToIe : 1;
3030eae32dcSDimitry Andric   uint8_t needsTlsLd : 1;
3040eae32dcSDimitry Andric   uint8_t needsGotDtprel : 1;
3050eae32dcSDimitry Andric   uint8_t needsTlsIe : 1;
3060eae32dcSDimitry Andric   uint8_t hasDirectReloc : 1;
3070eae32dcSDimitry Andric 
308*04eeddc0SDimitry Andric   bool needsDynReloc() const {
309*04eeddc0SDimitry Andric     return needsCopy || needsGot || needsPlt || needsTlsDesc || needsTlsGd ||
310*04eeddc0SDimitry Andric            needsTlsGdToIe || needsTlsLd || needsGotDtprel || needsTlsIe;
311*04eeddc0SDimitry Andric   }
312*04eeddc0SDimitry Andric   void allocateAux() {
313*04eeddc0SDimitry Andric     assert(auxIdx == uint32_t(-1));
314*04eeddc0SDimitry Andric     auxIdx = symAux.size();
315*04eeddc0SDimitry Andric     symAux.emplace_back();
316*04eeddc0SDimitry Andric   }
317*04eeddc0SDimitry Andric 
3180b57cec5SDimitry Andric   // The partition whose dynamic symbol table contains this symbol's definition.
3190b57cec5SDimitry Andric   uint8_t partition = 1;
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   bool isSection() const { return type == llvm::ELF::STT_SECTION; }
3220b57cec5SDimitry Andric   bool isTls() const { return type == llvm::ELF::STT_TLS; }
3230b57cec5SDimitry Andric   bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
3240b57cec5SDimitry Andric   bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
3250b57cec5SDimitry Andric   bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
3260b57cec5SDimitry Andric   bool isFile() const { return type == llvm::ELF::STT_FILE; }
3270b57cec5SDimitry Andric };
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric // Represents a symbol that is defined in the current output file.
3300b57cec5SDimitry Andric class Defined : public Symbol {
3310b57cec5SDimitry Andric public:
332*04eeddc0SDimitry Andric   Defined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
3330b57cec5SDimitry Andric           uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
3340b57cec5SDimitry Andric       : Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
3350b57cec5SDimitry Andric         size(size), section(section) {}
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isDefined(); }
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   uint64_t value;
3400b57cec5SDimitry Andric   uint64_t size;
3410b57cec5SDimitry Andric   SectionBase *section;
3420b57cec5SDimitry Andric };
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric // Represents a common symbol.
3450b57cec5SDimitry Andric //
3460b57cec5SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions
3470b57cec5SDimitry Andric // without initialization expressions (such as "int foo;") to header
3480b57cec5SDimitry Andric // files. Such definition is called "tentative definition".
3490b57cec5SDimitry Andric //
3500b57cec5SDimitry Andric // Using tentative definition is usually considered a bad practice
3510b57cec5SDimitry Andric // because you should write only declarations (such as "extern int
3520b57cec5SDimitry Andric // foo;") to header files. Nevertheless, the linker and the compiler
3530b57cec5SDimitry Andric // have to do something to support bad code by allowing duplicate
3540b57cec5SDimitry Andric // definitions for this particular case.
3550b57cec5SDimitry Andric //
3560b57cec5SDimitry Andric // Common symbols represent variable definitions without initializations.
357480093f4SDimitry Andric // The compiler creates common symbols when it sees variable definitions
3580b57cec5SDimitry Andric // without initialization (you can suppress this behavior and let the
3590b57cec5SDimitry Andric // compiler create a regular defined symbol by -fno-common).
3600b57cec5SDimitry Andric //
3610b57cec5SDimitry Andric // The linker allows common symbols to be replaced by regular defined
3620b57cec5SDimitry Andric // symbols. If there are remaining common symbols after name resolution is
3630b57cec5SDimitry Andric // complete, they are converted to regular defined symbols in a .bss
3640b57cec5SDimitry Andric // section. (Therefore, the later passes don't see any CommonSymbols.)
3650b57cec5SDimitry Andric class CommonSymbol : public Symbol {
3660b57cec5SDimitry Andric public:
367*04eeddc0SDimitry Andric   CommonSymbol(InputFile *file, StringRef name, uint8_t binding,
3680b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
3690b57cec5SDimitry Andric       : Symbol(CommonKind, file, name, binding, stOther, type),
3700b57cec5SDimitry Andric         alignment(alignment), size(size) {}
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isCommon(); }
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   uint32_t alignment;
3750b57cec5SDimitry Andric   uint64_t size;
3760b57cec5SDimitry Andric };
3770b57cec5SDimitry Andric 
3780b57cec5SDimitry Andric class Undefined : public Symbol {
3790b57cec5SDimitry Andric public:
380*04eeddc0SDimitry Andric   Undefined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
3810b57cec5SDimitry Andric             uint8_t type, uint32_t discardedSecIdx = 0)
3820b57cec5SDimitry Andric       : Symbol(UndefinedKind, file, name, binding, stOther, type),
3830b57cec5SDimitry Andric         discardedSecIdx(discardedSecIdx) {}
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   // The section index if in a discarded section, 0 otherwise.
3880b57cec5SDimitry Andric   uint32_t discardedSecIdx;
3890b57cec5SDimitry Andric };
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric class SharedSymbol : public Symbol {
3920b57cec5SDimitry Andric public:
3930b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric   SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
3960b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
3970eae32dcSDimitry Andric                uint32_t alignment, uint16_t verdefIndex)
3980b57cec5SDimitry Andric       : Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
3990b57cec5SDimitry Andric         size(size), alignment(alignment) {
4000b57cec5SDimitry Andric     this->verdefIndex = verdefIndex;
4010b57cec5SDimitry Andric     // GNU ifunc is a mechanism to allow user-supplied functions to
4020b57cec5SDimitry Andric     // resolve PLT slot values at load-time. This is contrary to the
4030b57cec5SDimitry Andric     // regular symbol resolution scheme in which symbols are resolved just
4040b57cec5SDimitry Andric     // by name. Using this hook, you can program how symbols are solved
4050b57cec5SDimitry Andric     // for you program. For example, you can make "memcpy" to be resolved
4060b57cec5SDimitry Andric     // to a SSE-enabled version of memcpy only when a machine running the
4070b57cec5SDimitry Andric     // program supports the SSE instruction set.
4080b57cec5SDimitry Andric     //
4090b57cec5SDimitry Andric     // Naturally, such symbols should always be called through their PLT
4100b57cec5SDimitry Andric     // slots. What GNU ifunc symbols point to are resolver functions, and
4110b57cec5SDimitry Andric     // calling them directly doesn't make sense (unless you are writing a
4120b57cec5SDimitry Andric     // loader).
4130b57cec5SDimitry Andric     //
4140b57cec5SDimitry Andric     // For DSO symbols, we always call them through PLT slots anyway.
4150b57cec5SDimitry Andric     // So there's no difference between GNU ifunc and regular function
4160b57cec5SDimitry Andric     // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
4170b57cec5SDimitry Andric     if (this->type == llvm::ELF::STT_GNU_IFUNC)
4180b57cec5SDimitry Andric       this->type = llvm::ELF::STT_FUNC;
4190b57cec5SDimitry Andric   }
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   SharedFile &getFile() const { return *cast<SharedFile>(file); }
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric   uint64_t value; // st_value
4240b57cec5SDimitry Andric   uint64_t size;  // st_size
4250b57cec5SDimitry Andric   uint32_t alignment;
4260b57cec5SDimitry Andric };
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric // LazyArchive and LazyObject represent a symbols that is not yet in the link,
4290b57cec5SDimitry Andric // but we know where to find it if needed. If the resolver finds both Undefined
4300b57cec5SDimitry Andric // and Lazy for the same name, it will ask the Lazy to load a file.
4310b57cec5SDimitry Andric //
4320b57cec5SDimitry Andric // A special complication is the handling of weak undefined symbols. They should
4330b57cec5SDimitry Andric // not load a file, but we have to remember we have seen both the weak undefined
4340b57cec5SDimitry Andric // and the lazy. We represent that with a lazy symbol with a weak binding. This
4350b57cec5SDimitry Andric // means that code looking for undefined symbols normally also has to take lazy
4360b57cec5SDimitry Andric // symbols into consideration.
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric // This class represents a symbol defined in an archive file. It is
4390b57cec5SDimitry Andric // created from an archive file header, and it knows how to load an
4400b57cec5SDimitry Andric // object file from an archive to replace itself with a defined
4410b57cec5SDimitry Andric // symbol.
4420b57cec5SDimitry Andric class LazyArchive : public Symbol {
4430b57cec5SDimitry Andric public:
4440b57cec5SDimitry Andric   LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s)
4450b57cec5SDimitry Andric       : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL,
4460b57cec5SDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE),
4470b57cec5SDimitry Andric         sym(s) {}
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric   MemoryBufferRef getMemberBuffer();
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric   const llvm::object::Archive::Symbol sym;
4540b57cec5SDimitry Andric };
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric // LazyObject symbols represents symbols in object files between
4570b57cec5SDimitry Andric // --start-lib and --end-lib options.
4580b57cec5SDimitry Andric class LazyObject : public Symbol {
4590b57cec5SDimitry Andric public:
4600b57cec5SDimitry Andric   LazyObject(InputFile &file, StringRef name)
4610b57cec5SDimitry Andric       : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL,
4620eae32dcSDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {
4630eae32dcSDimitry Andric     isUsedInRegularObj = false;
4640eae32dcSDimitry Andric   }
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
4670b57cec5SDimitry Andric };
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric // Some linker-generated symbols need to be created as
4700b57cec5SDimitry Andric // Defined symbols.
4710b57cec5SDimitry Andric struct ElfSym {
4720b57cec5SDimitry Andric   // __bss_start
4730b57cec5SDimitry Andric   static Defined *bss;
4740b57cec5SDimitry Andric 
4750b57cec5SDimitry Andric   // etext and _etext
4760b57cec5SDimitry Andric   static Defined *etext1;
4770b57cec5SDimitry Andric   static Defined *etext2;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   // edata and _edata
4800b57cec5SDimitry Andric   static Defined *edata1;
4810b57cec5SDimitry Andric   static Defined *edata2;
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric   // end and _end
4840b57cec5SDimitry Andric   static Defined *end1;
4850b57cec5SDimitry Andric   static Defined *end2;
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric   // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
4880b57cec5SDimitry Andric   // be at some offset from the base of the .got section, usually 0 or
4890b57cec5SDimitry Andric   // the end of the .got.
4900b57cec5SDimitry Andric   static Defined *globalOffsetTable;
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
4930b57cec5SDimitry Andric   static Defined *mipsGp;
4940b57cec5SDimitry Andric   static Defined *mipsGpDisp;
4950b57cec5SDimitry Andric   static Defined *mipsLocalGp;
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric   // __rel{,a}_iplt_{start,end} symbols.
4980b57cec5SDimitry Andric   static Defined *relaIpltStart;
4990b57cec5SDimitry Andric   static Defined *relaIpltEnd;
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric   // __global_pointer$ for RISC-V.
5020b57cec5SDimitry Andric   static Defined *riscvGlobalPointer;
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric   // _TLS_MODULE_BASE_ on targets that support TLSDESC.
5050b57cec5SDimitry Andric   static Defined *tlsModuleBase;
5060b57cec5SDimitry Andric };
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric // A buffer class that is large enough to hold any Symbol-derived
5090b57cec5SDimitry Andric // object. We allocate memory using this class and instantiate a symbol
5100b57cec5SDimitry Andric // using the placement new.
5110b57cec5SDimitry Andric union SymbolUnion {
5120b57cec5SDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
5130b57cec5SDimitry Andric   alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
5140b57cec5SDimitry Andric   alignas(Undefined) char c[sizeof(Undefined)];
5150b57cec5SDimitry Andric   alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
5160b57cec5SDimitry Andric   alignas(LazyArchive) char e[sizeof(LazyArchive)];
5170b57cec5SDimitry Andric   alignas(LazyObject) char f[sizeof(LazyObject)];
5180b57cec5SDimitry Andric };
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric // It is important to keep the size of SymbolUnion small for performance and
521*04eeddc0SDimitry Andric // memory usage reasons. 72 bytes is a soft limit based on the size of Defined
5220b57cec5SDimitry Andric // on a 64-bit system.
523*04eeddc0SDimitry Andric static_assert(sizeof(SymbolUnion) <= 72, "SymbolUnion too large");
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric template <typename T> struct AssertSymbol {
5260b57cec5SDimitry Andric   static_assert(std::is_trivially_destructible<T>(),
5270b57cec5SDimitry Andric                 "Symbol types must be trivially destructible");
5280b57cec5SDimitry Andric   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
5290b57cec5SDimitry Andric   static_assert(alignof(T) <= alignof(SymbolUnion),
5300b57cec5SDimitry Andric                 "SymbolUnion not aligned enough");
5310b57cec5SDimitry Andric };
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric static inline void assertSymbols() {
5340b57cec5SDimitry Andric   AssertSymbol<Defined>();
5350b57cec5SDimitry Andric   AssertSymbol<CommonSymbol>();
5360b57cec5SDimitry Andric   AssertSymbol<Undefined>();
5370b57cec5SDimitry Andric   AssertSymbol<SharedSymbol>();
5380b57cec5SDimitry Andric   AssertSymbol<LazyArchive>();
5390b57cec5SDimitry Andric   AssertSymbol<LazyObject>();
5400b57cec5SDimitry Andric }
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric void printTraceSymbol(const Symbol *sym);
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric size_t Symbol::getSymbolSize() const {
5450b57cec5SDimitry Andric   switch (kind()) {
5460b57cec5SDimitry Andric   case CommonKind:
5470b57cec5SDimitry Andric     return sizeof(CommonSymbol);
5480b57cec5SDimitry Andric   case DefinedKind:
5490b57cec5SDimitry Andric     return sizeof(Defined);
5500b57cec5SDimitry Andric   case LazyArchiveKind:
5510b57cec5SDimitry Andric     return sizeof(LazyArchive);
5520b57cec5SDimitry Andric   case LazyObjectKind:
5530b57cec5SDimitry Andric     return sizeof(LazyObject);
5540b57cec5SDimitry Andric   case SharedKind:
5550b57cec5SDimitry Andric     return sizeof(SharedSymbol);
5560b57cec5SDimitry Andric   case UndefinedKind:
5570b57cec5SDimitry Andric     return sizeof(Undefined);
5580b57cec5SDimitry Andric   case PlaceholderKind:
5590b57cec5SDimitry Andric     return sizeof(Symbol);
5600b57cec5SDimitry Andric   }
5610b57cec5SDimitry Andric   llvm_unreachable("unknown symbol kind");
5620b57cec5SDimitry Andric }
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric // replace() replaces "this" object with a given symbol by memcpy'ing
5650b57cec5SDimitry Andric // it over to "this". This function is called as a result of name
5660b57cec5SDimitry Andric // resolution, e.g. to replace an undefind symbol with a defined symbol.
56785868e8aSDimitry Andric void Symbol::replace(const Symbol &newSym) {
5680b57cec5SDimitry Andric   using llvm::ELF::STT_TLS;
5690b57cec5SDimitry Andric 
5705ffd83dbSDimitry Andric   // st_value of STT_TLS represents the assigned offset, not the actual address
5715ffd83dbSDimitry Andric   // which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can only be
5725ffd83dbSDimitry Andric   // referenced by special TLS relocations. It is usually an error if a STT_TLS
5735ffd83dbSDimitry Andric   // symbol is replaced by a non-STT_TLS symbol, vice versa. There are two
5745ffd83dbSDimitry Andric   // exceptions: (a) a STT_NOTYPE lazy/undefined symbol can be replaced by a
5755ffd83dbSDimitry Andric   // STT_TLS symbol, (b) a STT_TLS undefined symbol can be replaced by a
5765ffd83dbSDimitry Andric   // STT_NOTYPE lazy symbol.
5775ffd83dbSDimitry Andric   if (symbolKind != PlaceholderKind && !newSym.isLazy() &&
5785ffd83dbSDimitry Andric       (type == STT_TLS) != (newSym.type == STT_TLS) &&
5795ffd83dbSDimitry Andric       type != llvm::ELF::STT_NOTYPE)
5800b57cec5SDimitry Andric     error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " +
58185868e8aSDimitry Andric           toString(newSym.file) + "\n>>> defined in " + toString(file));
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric   Symbol old = *this;
58485868e8aSDimitry Andric   memcpy(this, &newSym, newSym.getSymbolSize());
5850b57cec5SDimitry Andric 
58685868e8aSDimitry Andric   // old may be a placeholder. The referenced fields must be initialized in
58785868e8aSDimitry Andric   // SymbolTable::insert.
5880b57cec5SDimitry Andric   versionId = old.versionId;
5890b57cec5SDimitry Andric   visibility = old.visibility;
5900b57cec5SDimitry Andric   isUsedInRegularObj = old.isUsedInRegularObj;
5910b57cec5SDimitry Andric   exportDynamic = old.exportDynamic;
59285868e8aSDimitry Andric   inDynamicList = old.inDynamicList;
5930b57cec5SDimitry Andric   canInline = old.canInline;
59485868e8aSDimitry Andric   referenced = old.referenced;
5950b57cec5SDimitry Andric   traced = old.traced;
596*04eeddc0SDimitry Andric   hasVersionSuffix = old.hasVersionSuffix;
5970b57cec5SDimitry Andric   isPreemptible = old.isPreemptible;
5980b57cec5SDimitry Andric   scriptDefined = old.scriptDefined;
5990b57cec5SDimitry Andric   partition = old.partition;
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric   // Print out a log message if --trace-symbol was specified.
6020b57cec5SDimitry Andric   // This is for debugging.
6030b57cec5SDimitry Andric   if (traced)
6040b57cec5SDimitry Andric     printTraceSymbol(this);
6050b57cec5SDimitry Andric }
6060b57cec5SDimitry Andric 
6070eae32dcSDimitry Andric template <typename... T> Defined *makeDefined(T &&...args) {
6080eae32dcSDimitry Andric   return new (reinterpret_cast<Defined *>(
6090eae32dcSDimitry Andric       getSpecificAllocSingleton<SymbolUnion>().Allocate()))
6100eae32dcSDimitry Andric       Defined(std::forward<T>(args)...);
6110eae32dcSDimitry Andric }
6120eae32dcSDimitry Andric 
6130b57cec5SDimitry Andric void maybeWarnUnorderableSymbol(const Symbol *sym);
614480093f4SDimitry Andric bool computeIsPreemptible(const Symbol &sym);
6155ffd83dbSDimitry Andric void reportBackrefs();
6165ffd83dbSDimitry Andric 
6175ffd83dbSDimitry Andric // A mapping from a symbol to an InputFile referencing it backward. Used by
6185ffd83dbSDimitry Andric // --warn-backrefs.
619e8d8bef9SDimitry Andric extern llvm::DenseMap<const Symbol *,
620e8d8bef9SDimitry Andric                       std::pair<const InputFile *, const InputFile *>>
621e8d8bef9SDimitry Andric     backwardReferences;
622480093f4SDimitry Andric 
623349cc55cSDimitry Andric // A tuple of (reference, extractedFile, sym). Used by --why-extract=.
624349cc55cSDimitry Andric extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
625349cc55cSDimitry Andric                    0>
626349cc55cSDimitry Andric     whyExtract;
627349cc55cSDimitry Andric 
6280b57cec5SDimitry Andric } // namespace elf
6290b57cec5SDimitry Andric } // namespace lld
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric #endif
632