xref: /freebsd/contrib/llvm-project/lld/ELF/Symbols.h (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines various types of Symbols.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLD_ELF_SYMBOLS_H
140b57cec5SDimitry Andric #define LLD_ELF_SYMBOLS_H
150b57cec5SDimitry Andric 
16*81ad6265SDimitry Andric #include "Config.h"
170b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
180eae32dcSDimitry Andric #include "lld/Common/Memory.h"
195ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h"
200b57cec5SDimitry Andric #include "llvm/Object/ELF.h"
21349cc55cSDimitry Andric #include <tuple>
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric namespace lld {
24*81ad6265SDimitry Andric namespace elf {
25*81ad6265SDimitry Andric class Symbol;
26*81ad6265SDimitry Andric }
275ffd83dbSDimitry Andric // Returns a string representation for a symbol for diagnostics.
2885868e8aSDimitry Andric std::string toString(const elf::Symbol &);
2985868e8aSDimitry Andric 
300b57cec5SDimitry Andric namespace elf {
310b57cec5SDimitry Andric class CommonSymbol;
320b57cec5SDimitry Andric class Defined;
33*81ad6265SDimitry Andric class OutputSection;
34*81ad6265SDimitry Andric class SectionBase;
35*81ad6265SDimitry Andric class InputSectionBase;
360b57cec5SDimitry Andric class SharedSymbol;
370b57cec5SDimitry Andric class Symbol;
380b57cec5SDimitry Andric class Undefined;
39*81ad6265SDimitry Andric class LazyObject;
40*81ad6265SDimitry Andric class InputFile;
410b57cec5SDimitry Andric 
4204eeddc0SDimitry Andric // Some index properties of a symbol are stored separately in this auxiliary
4304eeddc0SDimitry Andric // struct to decrease sizeof(SymbolUnion) in the majority of cases.
4404eeddc0SDimitry Andric struct SymbolAux {
4504eeddc0SDimitry Andric   uint32_t gotIdx = -1;
4604eeddc0SDimitry Andric   uint32_t pltIdx = -1;
4704eeddc0SDimitry Andric   uint32_t tlsDescIdx = -1;
4804eeddc0SDimitry Andric   uint32_t tlsGdIdx = -1;
490b57cec5SDimitry Andric };
500b57cec5SDimitry Andric 
5104eeddc0SDimitry Andric extern SmallVector<SymbolAux, 0> symAux;
5204eeddc0SDimitry Andric 
530b57cec5SDimitry Andric // The base class for real symbol classes.
540b57cec5SDimitry Andric class Symbol {
550b57cec5SDimitry Andric public:
560b57cec5SDimitry Andric   enum Kind {
570b57cec5SDimitry Andric     PlaceholderKind,
580b57cec5SDimitry Andric     DefinedKind,
590b57cec5SDimitry Andric     CommonKind,
600b57cec5SDimitry Andric     SharedKind,
610b57cec5SDimitry Andric     UndefinedKind,
620b57cec5SDimitry Andric     LazyObjectKind,
630b57cec5SDimitry Andric   };
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   Kind kind() const { return static_cast<Kind>(symbolKind); }
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric   // The file from which this symbol was created.
680b57cec5SDimitry Andric   InputFile *file;
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric protected:
710b57cec5SDimitry Andric   const char *nameData;
7204eeddc0SDimitry Andric   // 32-bit size saves space.
7304eeddc0SDimitry Andric   uint32_t nameSize;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric public:
76*81ad6265SDimitry Andric   // The next three fields have the same meaning as the ELF symbol attributes.
77*81ad6265SDimitry Andric   // type and binding are placed in this order to optimize generating st_info,
78*81ad6265SDimitry Andric   // which is defined as (binding << 4) + (type & 0xf), on a little-endian
79*81ad6265SDimitry Andric   // system.
80*81ad6265SDimitry Andric   uint8_t type : 4; // symbol type
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   // Symbol binding. This is not overwritten by replace() to track
830b57cec5SDimitry Andric   // changes during resolution. In particular:
840b57cec5SDimitry Andric   //  - An undefined weak is still weak when it resolves to a shared library.
854824e7fdSDimitry Andric   //  - An undefined weak will not extract archive members, but we have to
860b57cec5SDimitry Andric   //    remember it is weak.
87*81ad6265SDimitry Andric   uint8_t binding : 4;
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric   uint8_t stOther; // st_other field value
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   uint8_t symbolKind;
920b57cec5SDimitry Andric 
93*81ad6265SDimitry Andric   // The partition whose dynamic symbol table contains this symbol's definition.
94*81ad6265SDimitry Andric   uint8_t partition = 1;
95*81ad6265SDimitry Andric 
960b57cec5SDimitry Andric   // Symbol visibility. This is the computed minimum visibility of all
970b57cec5SDimitry Andric   // observed non-DSO symbols.
98480093f4SDimitry Andric   uint8_t visibility : 2;
990b57cec5SDimitry Andric 
100*81ad6265SDimitry Andric   // True if this symbol is preemptible at load time.
101*81ad6265SDimitry Andric   uint8_t isPreemptible : 1;
102*81ad6265SDimitry Andric 
1030b57cec5SDimitry Andric   // True if the symbol was used for linking and thus need to be added to the
1040b57cec5SDimitry Andric   // output file's symbol table. This is true for all symbols except for
1050b57cec5SDimitry Andric   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
1060b57cec5SDimitry Andric   // are unreferenced except by other bitcode objects.
107480093f4SDimitry Andric   uint8_t isUsedInRegularObj : 1;
1080b57cec5SDimitry Andric 
109*81ad6265SDimitry Andric   // True if an undefined or shared symbol is used from a live section.
110*81ad6265SDimitry Andric   //
111*81ad6265SDimitry Andric   // NOTE: In Writer.cpp the field is used to mark local defined symbols
112*81ad6265SDimitry Andric   // which are referenced by relocations when -r or --emit-relocs is given.
113*81ad6265SDimitry Andric   uint8_t used : 1;
114*81ad6265SDimitry Andric 
11585868e8aSDimitry Andric   // Used by a Defined symbol with protected or default visibility, to record
11685868e8aSDimitry Andric   // whether it is required to be exported into .dynsym. This is set when any of
11785868e8aSDimitry Andric   // the following conditions hold:
11885868e8aSDimitry Andric   //
119*81ad6265SDimitry Andric   // - If there is an interposable symbol from a DSO. Note: We also do this for
120*81ad6265SDimitry Andric   //   STV_PROTECTED symbols which can't be interposed (to match BFD behavior).
12185868e8aSDimitry Andric   // - If -shared or --export-dynamic is specified, any symbol in an object
12285868e8aSDimitry Andric   //   file/bitcode sets this property, unless suppressed by LTO
12385868e8aSDimitry Andric   //   canBeOmittedFromSymbolTable().
124480093f4SDimitry Andric   uint8_t exportDynamic : 1;
12585868e8aSDimitry Andric 
12685868e8aSDimitry Andric   // True if the symbol is in the --dynamic-list file. A Defined symbol with
12785868e8aSDimitry Andric   // protected or default visibility with this property is required to be
12885868e8aSDimitry Andric   // exported into .dynsym.
129480093f4SDimitry Andric   uint8_t inDynamicList : 1;
1300b57cec5SDimitry Andric 
131e8d8bef9SDimitry Andric   // Used to track if there has been at least one undefined reference to the
132e8d8bef9SDimitry Andric   // symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK
133e8d8bef9SDimitry Andric   // if the first undefined reference from a non-shared object is weak.
134480093f4SDimitry Andric   uint8_t referenced : 1;
1350b57cec5SDimitry Andric 
136*81ad6265SDimitry Andric   // Used to track if this symbol will be referenced after wrapping is performed
137*81ad6265SDimitry Andric   // (i.e. this will be true for foo if __real_foo is referenced, and will be
138*81ad6265SDimitry Andric   // true for __wrap_foo if foo is referenced).
139*81ad6265SDimitry Andric   uint8_t referencedAfterWrap : 1;
140*81ad6265SDimitry Andric 
1410b57cec5SDimitry Andric   // True if this symbol is specified by --trace-symbol option.
142480093f4SDimitry Andric   uint8_t traced : 1;
1430b57cec5SDimitry Andric 
14404eeddc0SDimitry Andric   // True if the name contains '@'.
14504eeddc0SDimitry Andric   uint8_t hasVersionSuffix : 1;
14604eeddc0SDimitry Andric 
147*81ad6265SDimitry Andric   inline void replace(const Symbol &other);
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   bool includeInDynsym() const;
1500b57cec5SDimitry Andric   uint8_t computeBinding() const;
151*81ad6265SDimitry Andric   bool isGlobal() const { return binding == llvm::ELF::STB_GLOBAL; }
1520b57cec5SDimitry Andric   bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   bool isUndefined() const { return symbolKind == UndefinedKind; }
1550b57cec5SDimitry Andric   bool isCommon() const { return symbolKind == CommonKind; }
1560b57cec5SDimitry Andric   bool isDefined() const { return symbolKind == DefinedKind; }
1570b57cec5SDimitry Andric   bool isShared() const { return symbolKind == SharedKind; }
1580b57cec5SDimitry Andric   bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
1610b57cec5SDimitry Andric 
162*81ad6265SDimitry Andric   bool isLazy() const { return symbolKind == LazyObjectKind; }
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric   // True if this is an undefined weak symbol. This only works once
1650b57cec5SDimitry Andric   // all input files have been added.
166349cc55cSDimitry Andric   bool isUndefWeak() const { return isWeak() && isUndefined(); }
1670b57cec5SDimitry Andric 
16804eeddc0SDimitry Andric   StringRef getName() const { return {nameData, nameSize}; }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   void setName(StringRef s) {
1710b57cec5SDimitry Andric     nameData = s.data();
1720b57cec5SDimitry Andric     nameSize = s.size();
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   void parseSymbolVersion();
1760b57cec5SDimitry Andric 
177e8d8bef9SDimitry Andric   // Get the NUL-terminated version suffix ("", "@...", or "@@...").
178e8d8bef9SDimitry Andric   //
179e8d8bef9SDimitry Andric   // For @@, the name has been truncated by insert(). For @, the name has been
180e8d8bef9SDimitry Andric   // truncated by Symbol::parseSymbolVersion().
18104eeddc0SDimitry Andric   const char *getVersionSuffix() const { return nameData + nameSize; }
18204eeddc0SDimitry Andric 
18304eeddc0SDimitry Andric   uint32_t getGotIdx() const {
18404eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].gotIdx;
18504eeddc0SDimitry Andric   }
18604eeddc0SDimitry Andric   uint32_t getPltIdx() const {
18704eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].pltIdx;
18804eeddc0SDimitry Andric   }
18904eeddc0SDimitry Andric   uint32_t getTlsDescIdx() const {
19004eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].tlsDescIdx;
19104eeddc0SDimitry Andric   }
19204eeddc0SDimitry Andric   uint32_t getTlsGdIdx() const {
19304eeddc0SDimitry Andric     return auxIdx == uint32_t(-1) ? uint32_t(-1) : symAux[auxIdx].tlsGdIdx;
194e8d8bef9SDimitry Andric   }
195e8d8bef9SDimitry Andric 
19604eeddc0SDimitry Andric   bool isInGot() const { return getGotIdx() != uint32_t(-1); }
19704eeddc0SDimitry Andric   bool isInPlt() const { return getPltIdx() != uint32_t(-1); }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   uint64_t getVA(int64_t addend = 0) const;
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric   uint64_t getGotOffset() const;
2020b57cec5SDimitry Andric   uint64_t getGotVA() const;
2030b57cec5SDimitry Andric   uint64_t getGotPltOffset() const;
2040b57cec5SDimitry Andric   uint64_t getGotPltVA() const;
2050b57cec5SDimitry Andric   uint64_t getPltVA() const;
2060b57cec5SDimitry Andric   uint64_t getSize() const;
2070b57cec5SDimitry Andric   OutputSection *getOutputSection() const;
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   // The following two functions are used for symbol resolution.
2100b57cec5SDimitry Andric   //
2110b57cec5SDimitry Andric   // You are expected to call mergeProperties for all symbols in input
2120b57cec5SDimitry Andric   // files so that attributes that are attached to names rather than
2130b57cec5SDimitry Andric   // indivisual symbol (such as visibility) are merged together.
2140b57cec5SDimitry Andric   //
2150b57cec5SDimitry Andric   // Every time you read a new symbol from an input, you are supposed
2160b57cec5SDimitry Andric   // to call resolve() with the new symbol. That function replaces
2170b57cec5SDimitry Andric   // "this" object as a result of name resolution if the new symbol is
2180b57cec5SDimitry Andric   // more appropriate to be included in the output.
2190b57cec5SDimitry Andric   //
2200b57cec5SDimitry Andric   // For example, if "this" is an undefined symbol and a new symbol is
2210b57cec5SDimitry Andric   // a defined symbol, "this" is replaced with the new symbol.
2220b57cec5SDimitry Andric   void mergeProperties(const Symbol &other);
2230b57cec5SDimitry Andric   void resolve(const Symbol &other);
2240b57cec5SDimitry Andric 
2254824e7fdSDimitry Andric   // If this is a lazy symbol, extract an input file and add the symbol
2260b57cec5SDimitry Andric   // in the file to the symbol table. Calling this function on
2270b57cec5SDimitry Andric   // non-lazy object causes a runtime error.
2284824e7fdSDimitry Andric   void extract() const;
2290b57cec5SDimitry Andric 
230*81ad6265SDimitry Andric   void checkDuplicate(const Defined &other) const;
2310b57cec5SDimitry Andric 
232fe6060f1SDimitry Andric private:
2330b57cec5SDimitry Andric   void resolveUndefined(const Undefined &other);
2340b57cec5SDimitry Andric   void resolveCommon(const CommonSymbol &other);
2350b57cec5SDimitry Andric   void resolveDefined(const Defined &other);
236*81ad6265SDimitry Andric   void resolveLazy(const LazyObject &other);
2370b57cec5SDimitry Andric   void resolveShared(const SharedSymbol &other);
2380b57cec5SDimitry Andric 
239*81ad6265SDimitry Andric   bool shouldReplace(const Defined &other) const;
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   inline size_t getSymbolSize() const;
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric protected:
24404eeddc0SDimitry Andric   Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding,
2450b57cec5SDimitry Andric          uint8_t stOther, uint8_t type)
246*81ad6265SDimitry Andric       : file(file), nameData(name.data()), nameSize(name.size()), type(type),
247*81ad6265SDimitry Andric         binding(binding), stOther(stOther), symbolKind(k),
248*81ad6265SDimitry Andric         visibility(stOther & 3), isPreemptible(false),
249*81ad6265SDimitry Andric         isUsedInRegularObj(false), used(false), exportDynamic(false),
250*81ad6265SDimitry Andric         inDynamicList(false), referenced(false), referencedAfterWrap(false),
251*81ad6265SDimitry Andric         traced(false), hasVersionSuffix(false), isInIplt(false),
252*81ad6265SDimitry Andric         gotInIgot(false), folded(false), needsTocRestore(false),
253*81ad6265SDimitry Andric         scriptDefined(false), needsCopy(false), needsGot(false),
254*81ad6265SDimitry Andric         needsPlt(false), needsTlsDesc(false), needsTlsGd(false),
255*81ad6265SDimitry Andric         needsTlsGdToIe(false), needsGotDtprel(false), needsTlsIe(false),
256*81ad6265SDimitry Andric         hasDirectReloc(false) {}
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric public:
2590b57cec5SDimitry Andric   // True if this symbol is in the Iplt sub-section of the Plt and the Igot
2600b57cec5SDimitry Andric   // sub-section of the .got.plt or .got.
261480093f4SDimitry Andric   uint8_t isInIplt : 1;
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric   // True if this symbol needs a GOT entry and its GOT entry is actually in
2640b57cec5SDimitry Andric   // Igot. This will be true only for certain non-preemptible ifuncs.
265480093f4SDimitry Andric   uint8_t gotInIgot : 1;
2660b57cec5SDimitry Andric 
2670eae32dcSDimitry Andric   // True if defined relative to a section discarded by ICF.
2680eae32dcSDimitry Andric   uint8_t folded : 1;
2690eae32dcSDimitry Andric 
2700b57cec5SDimitry Andric   // True if a call to this symbol needs to be followed by a restore of the
2710b57cec5SDimitry Andric   // PPC64 toc pointer.
272480093f4SDimitry Andric   uint8_t needsTocRestore : 1;
2730b57cec5SDimitry Andric 
274*81ad6265SDimitry Andric   // True if this symbol is defined by a symbol assignment or wrapped by --wrap.
275*81ad6265SDimitry Andric   //
276*81ad6265SDimitry Andric   // LTO shouldn't inline the symbol because it doesn't know the final content
277*81ad6265SDimitry Andric   // of the symbol.
278480093f4SDimitry Andric   uint8_t scriptDefined : 1;
2790b57cec5SDimitry Andric 
2800eae32dcSDimitry Andric   // True if this symbol needs a canonical PLT entry, or (during
2810eae32dcSDimitry Andric   // postScanRelocations) a copy relocation.
2820eae32dcSDimitry Andric   uint8_t needsCopy : 1;
2830eae32dcSDimitry Andric 
2840eae32dcSDimitry Andric   // Temporary flags used to communicate which symbol entries need PLT and GOT
2850eae32dcSDimitry Andric   // entries during postScanRelocations();
2860eae32dcSDimitry Andric   uint8_t needsGot : 1;
2870eae32dcSDimitry Andric   uint8_t needsPlt : 1;
2880eae32dcSDimitry Andric   uint8_t needsTlsDesc : 1;
2890eae32dcSDimitry Andric   uint8_t needsTlsGd : 1;
2900eae32dcSDimitry Andric   uint8_t needsTlsGdToIe : 1;
2910eae32dcSDimitry Andric   uint8_t needsGotDtprel : 1;
2920eae32dcSDimitry Andric   uint8_t needsTlsIe : 1;
2930eae32dcSDimitry Andric   uint8_t hasDirectReloc : 1;
2940eae32dcSDimitry Andric 
295*81ad6265SDimitry Andric   // A symAux index used to access GOT/PLT entry indexes. This is allocated in
296*81ad6265SDimitry Andric   // postScanRelocations().
297*81ad6265SDimitry Andric   uint32_t auxIdx = -1;
298*81ad6265SDimitry Andric   uint32_t dynsymIndex = 0;
299*81ad6265SDimitry Andric 
300*81ad6265SDimitry Andric   // This field is a index to the symbol's version definition.
301*81ad6265SDimitry Andric   uint16_t verdefIndex = -1;
302*81ad6265SDimitry Andric 
303*81ad6265SDimitry Andric   // Version definition index.
304*81ad6265SDimitry Andric   uint16_t versionId;
305*81ad6265SDimitry Andric 
30604eeddc0SDimitry Andric   bool needsDynReloc() const {
30704eeddc0SDimitry Andric     return needsCopy || needsGot || needsPlt || needsTlsDesc || needsTlsGd ||
308*81ad6265SDimitry Andric            needsTlsGdToIe || needsGotDtprel || needsTlsIe;
30904eeddc0SDimitry Andric   }
31004eeddc0SDimitry Andric   void allocateAux() {
31104eeddc0SDimitry Andric     assert(auxIdx == uint32_t(-1));
31204eeddc0SDimitry Andric     auxIdx = symAux.size();
31304eeddc0SDimitry Andric     symAux.emplace_back();
31404eeddc0SDimitry Andric   }
31504eeddc0SDimitry Andric 
3160b57cec5SDimitry Andric   bool isSection() const { return type == llvm::ELF::STT_SECTION; }
3170b57cec5SDimitry Andric   bool isTls() const { return type == llvm::ELF::STT_TLS; }
3180b57cec5SDimitry Andric   bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
3190b57cec5SDimitry Andric   bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
3200b57cec5SDimitry Andric   bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
3210b57cec5SDimitry Andric   bool isFile() const { return type == llvm::ELF::STT_FILE; }
3220b57cec5SDimitry Andric };
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric // Represents a symbol that is defined in the current output file.
3250b57cec5SDimitry Andric class Defined : public Symbol {
3260b57cec5SDimitry Andric public:
32704eeddc0SDimitry Andric   Defined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
3280b57cec5SDimitry Andric           uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
3290b57cec5SDimitry Andric       : Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
330*81ad6265SDimitry Andric         size(size), section(section) {
331*81ad6265SDimitry Andric     exportDynamic = config->exportDynamic;
332*81ad6265SDimitry Andric   }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isDefined(); }
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   uint64_t value;
3370b57cec5SDimitry Andric   uint64_t size;
3380b57cec5SDimitry Andric   SectionBase *section;
3390b57cec5SDimitry Andric };
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric // Represents a common symbol.
3420b57cec5SDimitry Andric //
3430b57cec5SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions
3440b57cec5SDimitry Andric // without initialization expressions (such as "int foo;") to header
3450b57cec5SDimitry Andric // files. Such definition is called "tentative definition".
3460b57cec5SDimitry Andric //
3470b57cec5SDimitry Andric // Using tentative definition is usually considered a bad practice
3480b57cec5SDimitry Andric // because you should write only declarations (such as "extern int
3490b57cec5SDimitry Andric // foo;") to header files. Nevertheless, the linker and the compiler
3500b57cec5SDimitry Andric // have to do something to support bad code by allowing duplicate
3510b57cec5SDimitry Andric // definitions for this particular case.
3520b57cec5SDimitry Andric //
3530b57cec5SDimitry Andric // Common symbols represent variable definitions without initializations.
354480093f4SDimitry Andric // The compiler creates common symbols when it sees variable definitions
3550b57cec5SDimitry Andric // without initialization (you can suppress this behavior and let the
3560b57cec5SDimitry Andric // compiler create a regular defined symbol by -fno-common).
3570b57cec5SDimitry Andric //
3580b57cec5SDimitry Andric // The linker allows common symbols to be replaced by regular defined
3590b57cec5SDimitry Andric // symbols. If there are remaining common symbols after name resolution is
3600b57cec5SDimitry Andric // complete, they are converted to regular defined symbols in a .bss
3610b57cec5SDimitry Andric // section. (Therefore, the later passes don't see any CommonSymbols.)
3620b57cec5SDimitry Andric class CommonSymbol : public Symbol {
3630b57cec5SDimitry Andric public:
36404eeddc0SDimitry Andric   CommonSymbol(InputFile *file, StringRef name, uint8_t binding,
3650b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
3660b57cec5SDimitry Andric       : Symbol(CommonKind, file, name, binding, stOther, type),
367*81ad6265SDimitry Andric         alignment(alignment), size(size) {
368*81ad6265SDimitry Andric     exportDynamic = config->exportDynamic;
369*81ad6265SDimitry Andric   }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isCommon(); }
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   uint32_t alignment;
3740b57cec5SDimitry Andric   uint64_t size;
3750b57cec5SDimitry Andric };
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric class Undefined : public Symbol {
3780b57cec5SDimitry Andric public:
37904eeddc0SDimitry Andric   Undefined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
3800b57cec5SDimitry Andric             uint8_t type, uint32_t discardedSecIdx = 0)
3810b57cec5SDimitry Andric       : Symbol(UndefinedKind, file, name, binding, stOther, type),
3820b57cec5SDimitry Andric         discardedSecIdx(discardedSecIdx) {}
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   // The section index if in a discarded section, 0 otherwise.
3870b57cec5SDimitry Andric   uint32_t discardedSecIdx;
388*81ad6265SDimitry Andric   bool nonPrevailing = false;
3890b57cec5SDimitry Andric };
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric class SharedSymbol : public Symbol {
3920b57cec5SDimitry Andric public:
3930b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
3940b57cec5SDimitry Andric 
3950b57cec5SDimitry Andric   SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
3960b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
397*81ad6265SDimitry Andric                uint32_t alignment)
3980b57cec5SDimitry Andric       : Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
3990b57cec5SDimitry Andric         size(size), alignment(alignment) {
400*81ad6265SDimitry Andric     exportDynamic = true;
4010b57cec5SDimitry Andric     // GNU ifunc is a mechanism to allow user-supplied functions to
4020b57cec5SDimitry Andric     // resolve PLT slot values at load-time. This is contrary to the
4030b57cec5SDimitry Andric     // regular symbol resolution scheme in which symbols are resolved just
4040b57cec5SDimitry Andric     // by name. Using this hook, you can program how symbols are solved
4050b57cec5SDimitry Andric     // for you program. For example, you can make "memcpy" to be resolved
4060b57cec5SDimitry Andric     // to a SSE-enabled version of memcpy only when a machine running the
4070b57cec5SDimitry Andric     // program supports the SSE instruction set.
4080b57cec5SDimitry Andric     //
4090b57cec5SDimitry Andric     // Naturally, such symbols should always be called through their PLT
4100b57cec5SDimitry Andric     // slots. What GNU ifunc symbols point to are resolver functions, and
4110b57cec5SDimitry Andric     // calling them directly doesn't make sense (unless you are writing a
4120b57cec5SDimitry Andric     // loader).
4130b57cec5SDimitry Andric     //
4140b57cec5SDimitry Andric     // For DSO symbols, we always call them through PLT slots anyway.
4150b57cec5SDimitry Andric     // So there's no difference between GNU ifunc and regular function
4160b57cec5SDimitry Andric     // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
4170b57cec5SDimitry Andric     if (this->type == llvm::ELF::STT_GNU_IFUNC)
4180b57cec5SDimitry Andric       this->type = llvm::ELF::STT_FUNC;
4190b57cec5SDimitry Andric   }
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   uint64_t value; // st_value
4220b57cec5SDimitry Andric   uint64_t size;  // st_size
4230b57cec5SDimitry Andric   uint32_t alignment;
4240b57cec5SDimitry Andric };
4250b57cec5SDimitry Andric 
426*81ad6265SDimitry Andric // LazyObject symbols represent symbols in object files between --start-lib and
427*81ad6265SDimitry Andric // --end-lib options. LLD also handles traditional archives as if all the files
428*81ad6265SDimitry Andric // in the archive are surrounded by --start-lib and --end-lib.
4290b57cec5SDimitry Andric //
4300b57cec5SDimitry Andric // A special complication is the handling of weak undefined symbols. They should
4310b57cec5SDimitry Andric // not load a file, but we have to remember we have seen both the weak undefined
4320b57cec5SDimitry Andric // and the lazy. We represent that with a lazy symbol with a weak binding. This
4330b57cec5SDimitry Andric // means that code looking for undefined symbols normally also has to take lazy
4340b57cec5SDimitry Andric // symbols into consideration.
4350b57cec5SDimitry Andric class LazyObject : public Symbol {
4360b57cec5SDimitry Andric public:
437*81ad6265SDimitry Andric   LazyObject(InputFile &file)
438*81ad6265SDimitry Andric       : Symbol(LazyObjectKind, &file, {}, llvm::ELF::STB_GLOBAL,
439*81ad6265SDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
4420b57cec5SDimitry Andric };
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric // Some linker-generated symbols need to be created as
4450b57cec5SDimitry Andric // Defined symbols.
4460b57cec5SDimitry Andric struct ElfSym {
4470b57cec5SDimitry Andric   // __bss_start
4480b57cec5SDimitry Andric   static Defined *bss;
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric   // etext and _etext
4510b57cec5SDimitry Andric   static Defined *etext1;
4520b57cec5SDimitry Andric   static Defined *etext2;
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   // edata and _edata
4550b57cec5SDimitry Andric   static Defined *edata1;
4560b57cec5SDimitry Andric   static Defined *edata2;
4570b57cec5SDimitry Andric 
4580b57cec5SDimitry Andric   // end and _end
4590b57cec5SDimitry Andric   static Defined *end1;
4600b57cec5SDimitry Andric   static Defined *end2;
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric   // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
4630b57cec5SDimitry Andric   // be at some offset from the base of the .got section, usually 0 or
4640b57cec5SDimitry Andric   // the end of the .got.
4650b57cec5SDimitry Andric   static Defined *globalOffsetTable;
4660b57cec5SDimitry Andric 
4670b57cec5SDimitry Andric   // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
4680b57cec5SDimitry Andric   static Defined *mipsGp;
4690b57cec5SDimitry Andric   static Defined *mipsGpDisp;
4700b57cec5SDimitry Andric   static Defined *mipsLocalGp;
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   // __rel{,a}_iplt_{start,end} symbols.
4730b57cec5SDimitry Andric   static Defined *relaIpltStart;
4740b57cec5SDimitry Andric   static Defined *relaIpltEnd;
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric   // __global_pointer$ for RISC-V.
4770b57cec5SDimitry Andric   static Defined *riscvGlobalPointer;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   // _TLS_MODULE_BASE_ on targets that support TLSDESC.
4800b57cec5SDimitry Andric   static Defined *tlsModuleBase;
4810b57cec5SDimitry Andric };
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric // A buffer class that is large enough to hold any Symbol-derived
4840b57cec5SDimitry Andric // object. We allocate memory using this class and instantiate a symbol
4850b57cec5SDimitry Andric // using the placement new.
486*81ad6265SDimitry Andric 
487*81ad6265SDimitry Andric // It is important to keep the size of SymbolUnion small for performance and
488*81ad6265SDimitry Andric // memory usage reasons. 64 bytes is a soft limit based on the size of Defined
489*81ad6265SDimitry Andric // on a 64-bit system. This is enforced by a static_assert in Symbols.cpp.
4900b57cec5SDimitry Andric union SymbolUnion {
4910b57cec5SDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
4920b57cec5SDimitry Andric   alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
4930b57cec5SDimitry Andric   alignas(Undefined) char c[sizeof(Undefined)];
4940b57cec5SDimitry Andric   alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
495*81ad6265SDimitry Andric   alignas(LazyObject) char e[sizeof(LazyObject)];
4960b57cec5SDimitry Andric };
4970b57cec5SDimitry Andric 
498*81ad6265SDimitry Andric void printTraceSymbol(const Symbol &sym, StringRef name);
4990b57cec5SDimitry Andric 
5000b57cec5SDimitry Andric size_t Symbol::getSymbolSize() const {
5010b57cec5SDimitry Andric   switch (kind()) {
5020b57cec5SDimitry Andric   case CommonKind:
5030b57cec5SDimitry Andric     return sizeof(CommonSymbol);
5040b57cec5SDimitry Andric   case DefinedKind:
5050b57cec5SDimitry Andric     return sizeof(Defined);
5060b57cec5SDimitry Andric   case LazyObjectKind:
5070b57cec5SDimitry Andric     return sizeof(LazyObject);
5080b57cec5SDimitry Andric   case SharedKind:
5090b57cec5SDimitry Andric     return sizeof(SharedSymbol);
5100b57cec5SDimitry Andric   case UndefinedKind:
5110b57cec5SDimitry Andric     return sizeof(Undefined);
5120b57cec5SDimitry Andric   case PlaceholderKind:
5130b57cec5SDimitry Andric     return sizeof(Symbol);
5140b57cec5SDimitry Andric   }
5150b57cec5SDimitry Andric   llvm_unreachable("unknown symbol kind");
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric // replace() replaces "this" object with a given symbol by memcpy'ing
5190b57cec5SDimitry Andric // it over to "this". This function is called as a result of name
5200b57cec5SDimitry Andric // resolution, e.g. to replace an undefind symbol with a defined symbol.
521*81ad6265SDimitry Andric void Symbol::replace(const Symbol &other) {
5220b57cec5SDimitry Andric   Symbol old = *this;
523*81ad6265SDimitry Andric   memcpy(this, &other, other.getSymbolSize());
5240b57cec5SDimitry Andric 
52585868e8aSDimitry Andric   // old may be a placeholder. The referenced fields must be initialized in
52685868e8aSDimitry Andric   // SymbolTable::insert.
527*81ad6265SDimitry Andric   nameData = old.nameData;
528*81ad6265SDimitry Andric   nameSize = old.nameSize;
529*81ad6265SDimitry Andric   partition = old.partition;
5300b57cec5SDimitry Andric   visibility = old.visibility;
531*81ad6265SDimitry Andric   isPreemptible = old.isPreemptible;
5320b57cec5SDimitry Andric   isUsedInRegularObj = old.isUsedInRegularObj;
5330b57cec5SDimitry Andric   exportDynamic = old.exportDynamic;
53485868e8aSDimitry Andric   inDynamicList = old.inDynamicList;
53585868e8aSDimitry Andric   referenced = old.referenced;
5360b57cec5SDimitry Andric   traced = old.traced;
53704eeddc0SDimitry Andric   hasVersionSuffix = old.hasVersionSuffix;
5380b57cec5SDimitry Andric   scriptDefined = old.scriptDefined;
539*81ad6265SDimitry Andric   versionId = old.versionId;
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric   // Print out a log message if --trace-symbol was specified.
5420b57cec5SDimitry Andric   // This is for debugging.
5430b57cec5SDimitry Andric   if (traced)
544*81ad6265SDimitry Andric     printTraceSymbol(*this, getName());
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric 
5470eae32dcSDimitry Andric template <typename... T> Defined *makeDefined(T &&...args) {
5480eae32dcSDimitry Andric   return new (reinterpret_cast<Defined *>(
5490eae32dcSDimitry Andric       getSpecificAllocSingleton<SymbolUnion>().Allocate()))
5500eae32dcSDimitry Andric       Defined(std::forward<T>(args)...);
5510eae32dcSDimitry Andric }
5520eae32dcSDimitry Andric 
553*81ad6265SDimitry Andric void reportDuplicate(const Symbol &sym, const InputFile *newFile,
554*81ad6265SDimitry Andric                      InputSectionBase *errSec, uint64_t errOffset);
5550b57cec5SDimitry Andric void maybeWarnUnorderableSymbol(const Symbol *sym);
556480093f4SDimitry Andric bool computeIsPreemptible(const Symbol &sym);
557349cc55cSDimitry Andric 
5580b57cec5SDimitry Andric } // namespace elf
5590b57cec5SDimitry Andric } // namespace lld
5600b57cec5SDimitry Andric 
5610b57cec5SDimitry Andric #endif
562