xref: /freebsd/contrib/llvm-project/lld/ELF/Symbols.h (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
10b57cec5SDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines various types of Symbols.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLD_ELF_SYMBOLS_H
140b57cec5SDimitry Andric #define LLD_ELF_SYMBOLS_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "InputFiles.h"
170b57cec5SDimitry Andric #include "InputSection.h"
180b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
190b57cec5SDimitry Andric #include "lld/Common/Strings.h"
205ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h"
210b57cec5SDimitry Andric #include "llvm/Object/Archive.h"
220b57cec5SDimitry Andric #include "llvm/Object/ELF.h"
23349cc55cSDimitry Andric #include <tuple>
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace lld {
265ffd83dbSDimitry Andric // Returns a string representation for a symbol for diagnostics.
2785868e8aSDimitry Andric std::string toString(const elf::Symbol &);
2885868e8aSDimitry Andric 
2985868e8aSDimitry Andric // There are two different ways to convert an Archive::Symbol to a string:
3085868e8aSDimitry Andric // One for Microsoft name mangling and one for Itanium name mangling.
3185868e8aSDimitry Andric // Call the functions toCOFFString and toELFString, not just toString.
3285868e8aSDimitry Andric std::string toELFString(const llvm::object::Archive::Symbol &);
3385868e8aSDimitry Andric 
340b57cec5SDimitry Andric namespace elf {
350b57cec5SDimitry Andric class CommonSymbol;
360b57cec5SDimitry Andric class Defined;
370b57cec5SDimitry Andric class InputFile;
380b57cec5SDimitry Andric class LazyArchive;
390b57cec5SDimitry Andric class LazyObject;
400b57cec5SDimitry Andric class SharedSymbol;
410b57cec5SDimitry Andric class Symbol;
420b57cec5SDimitry Andric class Undefined;
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric // This is a StringRef-like container that doesn't run strlen().
450b57cec5SDimitry Andric //
460b57cec5SDimitry Andric // ELF string tables contain a lot of null-terminated strings. Most of them
470b57cec5SDimitry Andric // are not necessary for the linker because they are names of local symbols,
480b57cec5SDimitry Andric // and the linker doesn't use local symbol names for name resolution. So, we
490b57cec5SDimitry Andric // use this class to represents strings read from string tables.
500b57cec5SDimitry Andric struct StringRefZ {
510b57cec5SDimitry Andric   StringRefZ(const char *s) : data(s), size(-1) {}
520b57cec5SDimitry Andric   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric   const char *data;
550b57cec5SDimitry Andric   const uint32_t size;
560b57cec5SDimitry Andric };
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric // The base class for real symbol classes.
590b57cec5SDimitry Andric class Symbol {
600b57cec5SDimitry Andric public:
610b57cec5SDimitry Andric   enum Kind {
620b57cec5SDimitry Andric     PlaceholderKind,
630b57cec5SDimitry Andric     DefinedKind,
640b57cec5SDimitry Andric     CommonKind,
650b57cec5SDimitry Andric     SharedKind,
660b57cec5SDimitry Andric     UndefinedKind,
670b57cec5SDimitry Andric     LazyArchiveKind,
680b57cec5SDimitry Andric     LazyObjectKind,
690b57cec5SDimitry Andric   };
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric   Kind kind() const { return static_cast<Kind>(symbolKind); }
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   // The file from which this symbol was created.
740b57cec5SDimitry Andric   InputFile *file;
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric protected:
770b57cec5SDimitry Andric   const char *nameData;
780b57cec5SDimitry Andric   mutable uint32_t nameSize;
790b57cec5SDimitry Andric 
800b57cec5SDimitry Andric public:
810b57cec5SDimitry Andric   uint32_t dynsymIndex = 0;
820b57cec5SDimitry Andric   uint32_t gotIndex = -1;
830b57cec5SDimitry Andric   uint32_t pltIndex = -1;
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   uint32_t globalDynIndex = -1;
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric   // This field is a index to the symbol's version definition.
880b57cec5SDimitry Andric   uint32_t verdefIndex = -1;
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   // Version definition index.
910b57cec5SDimitry Andric   uint16_t versionId;
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric   // Symbol binding. This is not overwritten by replace() to track
940b57cec5SDimitry Andric   // changes during resolution. In particular:
950b57cec5SDimitry Andric   //  - An undefined weak is still weak when it resolves to a shared library.
96*4824e7fdSDimitry Andric   //  - An undefined weak will not extract archive members, but we have to
970b57cec5SDimitry Andric   //    remember it is weak.
980b57cec5SDimitry Andric   uint8_t binding;
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   // The following fields have the same meaning as the ELF symbol attributes.
1010b57cec5SDimitry Andric   uint8_t type;    // symbol type
1020b57cec5SDimitry Andric   uint8_t stOther; // st_other field value
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric   uint8_t symbolKind;
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   // Symbol visibility. This is the computed minimum visibility of all
1070b57cec5SDimitry Andric   // observed non-DSO symbols.
108480093f4SDimitry Andric   uint8_t visibility : 2;
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   // True if the symbol was used for linking and thus need to be added to the
1110b57cec5SDimitry Andric   // output file's symbol table. This is true for all symbols except for
1120b57cec5SDimitry Andric   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
1130b57cec5SDimitry Andric   // are unreferenced except by other bitcode objects.
114480093f4SDimitry Andric   uint8_t isUsedInRegularObj : 1;
1150b57cec5SDimitry Andric 
11685868e8aSDimitry Andric   // Used by a Defined symbol with protected or default visibility, to record
11785868e8aSDimitry Andric   // whether it is required to be exported into .dynsym. This is set when any of
11885868e8aSDimitry Andric   // the following conditions hold:
11985868e8aSDimitry Andric   //
12085868e8aSDimitry Andric   // - If there is an interposable symbol from a DSO.
12185868e8aSDimitry Andric   // - If -shared or --export-dynamic is specified, any symbol in an object
12285868e8aSDimitry Andric   //   file/bitcode sets this property, unless suppressed by LTO
12385868e8aSDimitry Andric   //   canBeOmittedFromSymbolTable().
124480093f4SDimitry Andric   uint8_t exportDynamic : 1;
12585868e8aSDimitry Andric 
12685868e8aSDimitry Andric   // True if the symbol is in the --dynamic-list file. A Defined symbol with
12785868e8aSDimitry Andric   // protected or default visibility with this property is required to be
12885868e8aSDimitry Andric   // exported into .dynsym.
129480093f4SDimitry Andric   uint8_t inDynamicList : 1;
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
1320b57cec5SDimitry Andric   // is overwritten after LTO, LTO shouldn't inline the symbol because it
1330b57cec5SDimitry Andric   // doesn't know the final contents of the symbol.
134480093f4SDimitry Andric   uint8_t canInline : 1;
13585868e8aSDimitry Andric 
136e8d8bef9SDimitry Andric   // Used to track if there has been at least one undefined reference to the
137e8d8bef9SDimitry Andric   // symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK
138e8d8bef9SDimitry Andric   // if the first undefined reference from a non-shared object is weak.
139e8d8bef9SDimitry Andric   //
140e8d8bef9SDimitry Andric   // This is also used to retain __wrap_foo when foo is referenced.
141480093f4SDimitry Andric   uint8_t referenced : 1;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   // True if this symbol is specified by --trace-symbol option.
144480093f4SDimitry Andric   uint8_t traced : 1;
1450b57cec5SDimitry Andric 
14685868e8aSDimitry Andric   inline void replace(const Symbol &newSym);
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric   bool includeInDynsym() const;
1490b57cec5SDimitry Andric   uint8_t computeBinding() const;
1500b57cec5SDimitry Andric   bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric   bool isUndefined() const { return symbolKind == UndefinedKind; }
1530b57cec5SDimitry Andric   bool isCommon() const { return symbolKind == CommonKind; }
1540b57cec5SDimitry Andric   bool isDefined() const { return symbolKind == DefinedKind; }
1550b57cec5SDimitry Andric   bool isShared() const { return symbolKind == SharedKind; }
1560b57cec5SDimitry Andric   bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric   bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   bool isLazy() const {
1610b57cec5SDimitry Andric     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
1620b57cec5SDimitry Andric   }
1630b57cec5SDimitry Andric 
1640b57cec5SDimitry Andric   // True if this is an undefined weak symbol. This only works once
1650b57cec5SDimitry Andric   // all input files have been added.
166349cc55cSDimitry Andric   bool isUndefWeak() const { return isWeak() && isUndefined(); }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   StringRef getName() const {
1690b57cec5SDimitry Andric     if (nameSize == (uint32_t)-1)
1700b57cec5SDimitry Andric       nameSize = strlen(nameData);
1710b57cec5SDimitry Andric     return {nameData, nameSize};
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   void setName(StringRef s) {
1750b57cec5SDimitry Andric     nameData = s.data();
1760b57cec5SDimitry Andric     nameSize = s.size();
1770b57cec5SDimitry Andric   }
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric   void parseSymbolVersion();
1800b57cec5SDimitry Andric 
181e8d8bef9SDimitry Andric   // Get the NUL-terminated version suffix ("", "@...", or "@@...").
182e8d8bef9SDimitry Andric   //
183e8d8bef9SDimitry Andric   // For @@, the name has been truncated by insert(). For @, the name has been
184e8d8bef9SDimitry Andric   // truncated by Symbol::parseSymbolVersion().
185e8d8bef9SDimitry Andric   const char *getVersionSuffix() const {
186e8d8bef9SDimitry Andric     (void)getName();
187e8d8bef9SDimitry Andric     return nameData + nameSize;
188e8d8bef9SDimitry Andric   }
189e8d8bef9SDimitry Andric 
1900b57cec5SDimitry Andric   bool isInGot() const { return gotIndex != -1U; }
1910b57cec5SDimitry Andric   bool isInPlt() const { return pltIndex != -1U; }
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   uint64_t getVA(int64_t addend = 0) const;
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   uint64_t getGotOffset() const;
1960b57cec5SDimitry Andric   uint64_t getGotVA() const;
1970b57cec5SDimitry Andric   uint64_t getGotPltOffset() const;
1980b57cec5SDimitry Andric   uint64_t getGotPltVA() const;
1990b57cec5SDimitry Andric   uint64_t getPltVA() const;
2000b57cec5SDimitry Andric   uint64_t getSize() const;
2010b57cec5SDimitry Andric   OutputSection *getOutputSection() const;
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric   // The following two functions are used for symbol resolution.
2040b57cec5SDimitry Andric   //
2050b57cec5SDimitry Andric   // You are expected to call mergeProperties for all symbols in input
2060b57cec5SDimitry Andric   // files so that attributes that are attached to names rather than
2070b57cec5SDimitry Andric   // indivisual symbol (such as visibility) are merged together.
2080b57cec5SDimitry Andric   //
2090b57cec5SDimitry Andric   // Every time you read a new symbol from an input, you are supposed
2100b57cec5SDimitry Andric   // to call resolve() with the new symbol. That function replaces
2110b57cec5SDimitry Andric   // "this" object as a result of name resolution if the new symbol is
2120b57cec5SDimitry Andric   // more appropriate to be included in the output.
2130b57cec5SDimitry Andric   //
2140b57cec5SDimitry Andric   // For example, if "this" is an undefined symbol and a new symbol is
2150b57cec5SDimitry Andric   // a defined symbol, "this" is replaced with the new symbol.
2160b57cec5SDimitry Andric   void mergeProperties(const Symbol &other);
2170b57cec5SDimitry Andric   void resolve(const Symbol &other);
2180b57cec5SDimitry Andric 
219*4824e7fdSDimitry Andric   // If this is a lazy symbol, extract an input file and add the symbol
2200b57cec5SDimitry Andric   // in the file to the symbol table. Calling this function on
2210b57cec5SDimitry Andric   // non-lazy object causes a runtime error.
222*4824e7fdSDimitry Andric   void extract() const;
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   static bool isExportDynamic(Kind k, uint8_t visibility) {
2250b57cec5SDimitry Andric     if (k == SharedKind)
2260b57cec5SDimitry Andric       return visibility == llvm::ELF::STV_DEFAULT;
2270b57cec5SDimitry Andric     return config->shared || config->exportDynamic;
2280b57cec5SDimitry Andric   }
2290b57cec5SDimitry Andric 
230fe6060f1SDimitry Andric private:
2310b57cec5SDimitry Andric   void resolveUndefined(const Undefined &other);
2320b57cec5SDimitry Andric   void resolveCommon(const CommonSymbol &other);
2330b57cec5SDimitry Andric   void resolveDefined(const Defined &other);
2340b57cec5SDimitry Andric   template <class LazyT> void resolveLazy(const LazyT &other);
2350b57cec5SDimitry Andric   void resolveShared(const SharedSymbol &other);
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric   int compare(const Symbol *other) const;
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   inline size_t getSymbolSize() const;
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric protected:
2420b57cec5SDimitry Andric   Symbol(Kind k, InputFile *file, StringRefZ name, uint8_t binding,
2430b57cec5SDimitry Andric          uint8_t stOther, uint8_t type)
2440b57cec5SDimitry Andric       : file(file), nameData(name.data), nameSize(name.size), binding(binding),
2450b57cec5SDimitry Andric         type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3),
2460b57cec5SDimitry Andric         isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind),
24785868e8aSDimitry Andric         exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false),
24885868e8aSDimitry Andric         canInline(false), referenced(false), traced(false), needsPltAddr(false),
24985868e8aSDimitry Andric         isInIplt(false), gotInIgot(false), isPreemptible(false),
25085868e8aSDimitry Andric         used(!config->gcSections), needsTocRestore(false),
2510b57cec5SDimitry Andric         scriptDefined(false) {}
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric public:
2540b57cec5SDimitry Andric   // True the symbol should point to its PLT entry.
2550b57cec5SDimitry Andric   // For SharedSymbol only.
256480093f4SDimitry Andric   uint8_t needsPltAddr : 1;
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric   // True if this symbol is in the Iplt sub-section of the Plt and the Igot
2590b57cec5SDimitry Andric   // sub-section of the .got.plt or .got.
260480093f4SDimitry Andric   uint8_t isInIplt : 1;
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric   // True if this symbol needs a GOT entry and its GOT entry is actually in
2630b57cec5SDimitry Andric   // Igot. This will be true only for certain non-preemptible ifuncs.
264480093f4SDimitry Andric   uint8_t gotInIgot : 1;
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric   // True if this symbol is preemptible at load time.
267480093f4SDimitry Andric   uint8_t isPreemptible : 1;
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   // True if an undefined or shared symbol is used from a live section.
2705ffd83dbSDimitry Andric   //
2715ffd83dbSDimitry Andric   // NOTE: In Writer.cpp the field is used to mark local defined symbols
2725ffd83dbSDimitry Andric   // which are referenced by relocations when -r or --emit-relocs is given.
273480093f4SDimitry Andric   uint8_t used : 1;
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   // True if a call to this symbol needs to be followed by a restore of the
2760b57cec5SDimitry Andric   // PPC64 toc pointer.
277480093f4SDimitry Andric   uint8_t needsTocRestore : 1;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   // True if this symbol is defined by a linker script.
280480093f4SDimitry Andric   uint8_t scriptDefined : 1;
2810b57cec5SDimitry Andric 
2820b57cec5SDimitry Andric   // The partition whose dynamic symbol table contains this symbol's definition.
2830b57cec5SDimitry Andric   uint8_t partition = 1;
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric   bool isSection() const { return type == llvm::ELF::STT_SECTION; }
2860b57cec5SDimitry Andric   bool isTls() const { return type == llvm::ELF::STT_TLS; }
2870b57cec5SDimitry Andric   bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
2880b57cec5SDimitry Andric   bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
2890b57cec5SDimitry Andric   bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
2900b57cec5SDimitry Andric   bool isFile() const { return type == llvm::ELF::STT_FILE; }
2910b57cec5SDimitry Andric };
2920b57cec5SDimitry Andric 
2930b57cec5SDimitry Andric // Represents a symbol that is defined in the current output file.
2940b57cec5SDimitry Andric class Defined : public Symbol {
2950b57cec5SDimitry Andric public:
2960b57cec5SDimitry Andric   Defined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
2970b57cec5SDimitry Andric           uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
2980b57cec5SDimitry Andric       : Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
2990b57cec5SDimitry Andric         size(size), section(section) {}
3000b57cec5SDimitry Andric 
3010b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isDefined(); }
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   uint64_t value;
3040b57cec5SDimitry Andric   uint64_t size;
3050b57cec5SDimitry Andric   SectionBase *section;
3060b57cec5SDimitry Andric };
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric // Represents a common symbol.
3090b57cec5SDimitry Andric //
3100b57cec5SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions
3110b57cec5SDimitry Andric // without initialization expressions (such as "int foo;") to header
3120b57cec5SDimitry Andric // files. Such definition is called "tentative definition".
3130b57cec5SDimitry Andric //
3140b57cec5SDimitry Andric // Using tentative definition is usually considered a bad practice
3150b57cec5SDimitry Andric // because you should write only declarations (such as "extern int
3160b57cec5SDimitry Andric // foo;") to header files. Nevertheless, the linker and the compiler
3170b57cec5SDimitry Andric // have to do something to support bad code by allowing duplicate
3180b57cec5SDimitry Andric // definitions for this particular case.
3190b57cec5SDimitry Andric //
3200b57cec5SDimitry Andric // Common symbols represent variable definitions without initializations.
321480093f4SDimitry Andric // The compiler creates common symbols when it sees variable definitions
3220b57cec5SDimitry Andric // without initialization (you can suppress this behavior and let the
3230b57cec5SDimitry Andric // compiler create a regular defined symbol by -fno-common).
3240b57cec5SDimitry Andric //
3250b57cec5SDimitry Andric // The linker allows common symbols to be replaced by regular defined
3260b57cec5SDimitry Andric // symbols. If there are remaining common symbols after name resolution is
3270b57cec5SDimitry Andric // complete, they are converted to regular defined symbols in a .bss
3280b57cec5SDimitry Andric // section. (Therefore, the later passes don't see any CommonSymbols.)
3290b57cec5SDimitry Andric class CommonSymbol : public Symbol {
3300b57cec5SDimitry Andric public:
3310b57cec5SDimitry Andric   CommonSymbol(InputFile *file, StringRefZ name, uint8_t binding,
3320b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
3330b57cec5SDimitry Andric       : Symbol(CommonKind, file, name, binding, stOther, type),
3340b57cec5SDimitry Andric         alignment(alignment), size(size) {}
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isCommon(); }
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric   uint32_t alignment;
3390b57cec5SDimitry Andric   uint64_t size;
3400b57cec5SDimitry Andric };
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric class Undefined : public Symbol {
3430b57cec5SDimitry Andric public:
3440b57cec5SDimitry Andric   Undefined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
3450b57cec5SDimitry Andric             uint8_t type, uint32_t discardedSecIdx = 0)
3460b57cec5SDimitry Andric       : Symbol(UndefinedKind, file, name, binding, stOther, type),
3470b57cec5SDimitry Andric         discardedSecIdx(discardedSecIdx) {}
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
3500b57cec5SDimitry Andric 
3510b57cec5SDimitry Andric   // The section index if in a discarded section, 0 otherwise.
3520b57cec5SDimitry Andric   uint32_t discardedSecIdx;
3530b57cec5SDimitry Andric };
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric class SharedSymbol : public Symbol {
3560b57cec5SDimitry Andric public:
3570b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric   SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
3600b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
3610b57cec5SDimitry Andric                uint32_t alignment, uint32_t verdefIndex)
3620b57cec5SDimitry Andric       : Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
3630b57cec5SDimitry Andric         size(size), alignment(alignment) {
3640b57cec5SDimitry Andric     this->verdefIndex = verdefIndex;
3650b57cec5SDimitry Andric     // GNU ifunc is a mechanism to allow user-supplied functions to
3660b57cec5SDimitry Andric     // resolve PLT slot values at load-time. This is contrary to the
3670b57cec5SDimitry Andric     // regular symbol resolution scheme in which symbols are resolved just
3680b57cec5SDimitry Andric     // by name. Using this hook, you can program how symbols are solved
3690b57cec5SDimitry Andric     // for you program. For example, you can make "memcpy" to be resolved
3700b57cec5SDimitry Andric     // to a SSE-enabled version of memcpy only when a machine running the
3710b57cec5SDimitry Andric     // program supports the SSE instruction set.
3720b57cec5SDimitry Andric     //
3730b57cec5SDimitry Andric     // Naturally, such symbols should always be called through their PLT
3740b57cec5SDimitry Andric     // slots. What GNU ifunc symbols point to are resolver functions, and
3750b57cec5SDimitry Andric     // calling them directly doesn't make sense (unless you are writing a
3760b57cec5SDimitry Andric     // loader).
3770b57cec5SDimitry Andric     //
3780b57cec5SDimitry Andric     // For DSO symbols, we always call them through PLT slots anyway.
3790b57cec5SDimitry Andric     // So there's no difference between GNU ifunc and regular function
3800b57cec5SDimitry Andric     // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
3810b57cec5SDimitry Andric     if (this->type == llvm::ELF::STT_GNU_IFUNC)
3820b57cec5SDimitry Andric       this->type = llvm::ELF::STT_FUNC;
3830b57cec5SDimitry Andric   }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   SharedFile &getFile() const { return *cast<SharedFile>(file); }
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   uint64_t value; // st_value
3880b57cec5SDimitry Andric   uint64_t size;  // st_size
3890b57cec5SDimitry Andric   uint32_t alignment;
3900b57cec5SDimitry Andric };
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric // LazyArchive and LazyObject represent a symbols that is not yet in the link,
3930b57cec5SDimitry Andric // but we know where to find it if needed. If the resolver finds both Undefined
3940b57cec5SDimitry Andric // and Lazy for the same name, it will ask the Lazy to load a file.
3950b57cec5SDimitry Andric //
3960b57cec5SDimitry Andric // A special complication is the handling of weak undefined symbols. They should
3970b57cec5SDimitry Andric // not load a file, but we have to remember we have seen both the weak undefined
3980b57cec5SDimitry Andric // and the lazy. We represent that with a lazy symbol with a weak binding. This
3990b57cec5SDimitry Andric // means that code looking for undefined symbols normally also has to take lazy
4000b57cec5SDimitry Andric // symbols into consideration.
4010b57cec5SDimitry Andric 
4020b57cec5SDimitry Andric // This class represents a symbol defined in an archive file. It is
4030b57cec5SDimitry Andric // created from an archive file header, and it knows how to load an
4040b57cec5SDimitry Andric // object file from an archive to replace itself with a defined
4050b57cec5SDimitry Andric // symbol.
4060b57cec5SDimitry Andric class LazyArchive : public Symbol {
4070b57cec5SDimitry Andric public:
4080b57cec5SDimitry Andric   LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s)
4090b57cec5SDimitry Andric       : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL,
4100b57cec5SDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE),
4110b57cec5SDimitry Andric         sym(s) {}
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric   MemoryBufferRef getMemberBuffer();
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric   const llvm::object::Archive::Symbol sym;
4180b57cec5SDimitry Andric };
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric // LazyObject symbols represents symbols in object files between
4210b57cec5SDimitry Andric // --start-lib and --end-lib options.
4220b57cec5SDimitry Andric class LazyObject : public Symbol {
4230b57cec5SDimitry Andric public:
4240b57cec5SDimitry Andric   LazyObject(InputFile &file, StringRef name)
4250b57cec5SDimitry Andric       : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL,
4260b57cec5SDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
4270b57cec5SDimitry Andric 
4280b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
4290b57cec5SDimitry Andric };
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric // Some linker-generated symbols need to be created as
4320b57cec5SDimitry Andric // Defined symbols.
4330b57cec5SDimitry Andric struct ElfSym {
4340b57cec5SDimitry Andric   // __bss_start
4350b57cec5SDimitry Andric   static Defined *bss;
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric   // etext and _etext
4380b57cec5SDimitry Andric   static Defined *etext1;
4390b57cec5SDimitry Andric   static Defined *etext2;
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric   // edata and _edata
4420b57cec5SDimitry Andric   static Defined *edata1;
4430b57cec5SDimitry Andric   static Defined *edata2;
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   // end and _end
4460b57cec5SDimitry Andric   static Defined *end1;
4470b57cec5SDimitry Andric   static Defined *end2;
4480b57cec5SDimitry Andric 
4490b57cec5SDimitry Andric   // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
4500b57cec5SDimitry Andric   // be at some offset from the base of the .got section, usually 0 or
4510b57cec5SDimitry Andric   // the end of the .got.
4520b57cec5SDimitry Andric   static Defined *globalOffsetTable;
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric   // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
4550b57cec5SDimitry Andric   static Defined *mipsGp;
4560b57cec5SDimitry Andric   static Defined *mipsGpDisp;
4570b57cec5SDimitry Andric   static Defined *mipsLocalGp;
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric   // __rel{,a}_iplt_{start,end} symbols.
4600b57cec5SDimitry Andric   static Defined *relaIpltStart;
4610b57cec5SDimitry Andric   static Defined *relaIpltEnd;
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   // __global_pointer$ for RISC-V.
4640b57cec5SDimitry Andric   static Defined *riscvGlobalPointer;
4650b57cec5SDimitry Andric 
4660b57cec5SDimitry Andric   // _TLS_MODULE_BASE_ on targets that support TLSDESC.
4670b57cec5SDimitry Andric   static Defined *tlsModuleBase;
4680b57cec5SDimitry Andric };
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric // A buffer class that is large enough to hold any Symbol-derived
4710b57cec5SDimitry Andric // object. We allocate memory using this class and instantiate a symbol
4720b57cec5SDimitry Andric // using the placement new.
4730b57cec5SDimitry Andric union SymbolUnion {
4740b57cec5SDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
4750b57cec5SDimitry Andric   alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
4760b57cec5SDimitry Andric   alignas(Undefined) char c[sizeof(Undefined)];
4770b57cec5SDimitry Andric   alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
4780b57cec5SDimitry Andric   alignas(LazyArchive) char e[sizeof(LazyArchive)];
4790b57cec5SDimitry Andric   alignas(LazyObject) char f[sizeof(LazyObject)];
4800b57cec5SDimitry Andric };
4810b57cec5SDimitry Andric 
4820b57cec5SDimitry Andric // It is important to keep the size of SymbolUnion small for performance and
4830b57cec5SDimitry Andric // memory usage reasons. 80 bytes is a soft limit based on the size of Defined
4840b57cec5SDimitry Andric // on a 64-bit system.
4850b57cec5SDimitry Andric static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large");
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric template <typename T> struct AssertSymbol {
4880b57cec5SDimitry Andric   static_assert(std::is_trivially_destructible<T>(),
4890b57cec5SDimitry Andric                 "Symbol types must be trivially destructible");
4900b57cec5SDimitry Andric   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
4910b57cec5SDimitry Andric   static_assert(alignof(T) <= alignof(SymbolUnion),
4920b57cec5SDimitry Andric                 "SymbolUnion not aligned enough");
4930b57cec5SDimitry Andric };
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric static inline void assertSymbols() {
4960b57cec5SDimitry Andric   AssertSymbol<Defined>();
4970b57cec5SDimitry Andric   AssertSymbol<CommonSymbol>();
4980b57cec5SDimitry Andric   AssertSymbol<Undefined>();
4990b57cec5SDimitry Andric   AssertSymbol<SharedSymbol>();
5000b57cec5SDimitry Andric   AssertSymbol<LazyArchive>();
5010b57cec5SDimitry Andric   AssertSymbol<LazyObject>();
5020b57cec5SDimitry Andric }
5030b57cec5SDimitry Andric 
5040b57cec5SDimitry Andric void printTraceSymbol(const Symbol *sym);
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric size_t Symbol::getSymbolSize() const {
5070b57cec5SDimitry Andric   switch (kind()) {
5080b57cec5SDimitry Andric   case CommonKind:
5090b57cec5SDimitry Andric     return sizeof(CommonSymbol);
5100b57cec5SDimitry Andric   case DefinedKind:
5110b57cec5SDimitry Andric     return sizeof(Defined);
5120b57cec5SDimitry Andric   case LazyArchiveKind:
5130b57cec5SDimitry Andric     return sizeof(LazyArchive);
5140b57cec5SDimitry Andric   case LazyObjectKind:
5150b57cec5SDimitry Andric     return sizeof(LazyObject);
5160b57cec5SDimitry Andric   case SharedKind:
5170b57cec5SDimitry Andric     return sizeof(SharedSymbol);
5180b57cec5SDimitry Andric   case UndefinedKind:
5190b57cec5SDimitry Andric     return sizeof(Undefined);
5200b57cec5SDimitry Andric   case PlaceholderKind:
5210b57cec5SDimitry Andric     return sizeof(Symbol);
5220b57cec5SDimitry Andric   }
5230b57cec5SDimitry Andric   llvm_unreachable("unknown symbol kind");
5240b57cec5SDimitry Andric }
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric // replace() replaces "this" object with a given symbol by memcpy'ing
5270b57cec5SDimitry Andric // it over to "this". This function is called as a result of name
5280b57cec5SDimitry Andric // resolution, e.g. to replace an undefind symbol with a defined symbol.
52985868e8aSDimitry Andric void Symbol::replace(const Symbol &newSym) {
5300b57cec5SDimitry Andric   using llvm::ELF::STT_TLS;
5310b57cec5SDimitry Andric 
5325ffd83dbSDimitry Andric   // st_value of STT_TLS represents the assigned offset, not the actual address
5335ffd83dbSDimitry Andric   // which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can only be
5345ffd83dbSDimitry Andric   // referenced by special TLS relocations. It is usually an error if a STT_TLS
5355ffd83dbSDimitry Andric   // symbol is replaced by a non-STT_TLS symbol, vice versa. There are two
5365ffd83dbSDimitry Andric   // exceptions: (a) a STT_NOTYPE lazy/undefined symbol can be replaced by a
5375ffd83dbSDimitry Andric   // STT_TLS symbol, (b) a STT_TLS undefined symbol can be replaced by a
5385ffd83dbSDimitry Andric   // STT_NOTYPE lazy symbol.
5395ffd83dbSDimitry Andric   if (symbolKind != PlaceholderKind && !newSym.isLazy() &&
5405ffd83dbSDimitry Andric       (type == STT_TLS) != (newSym.type == STT_TLS) &&
5415ffd83dbSDimitry Andric       type != llvm::ELF::STT_NOTYPE)
5420b57cec5SDimitry Andric     error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " +
54385868e8aSDimitry Andric           toString(newSym.file) + "\n>>> defined in " + toString(file));
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric   Symbol old = *this;
54685868e8aSDimitry Andric   memcpy(this, &newSym, newSym.getSymbolSize());
5470b57cec5SDimitry Andric 
54885868e8aSDimitry Andric   // old may be a placeholder. The referenced fields must be initialized in
54985868e8aSDimitry Andric   // SymbolTable::insert.
5500b57cec5SDimitry Andric   versionId = old.versionId;
5510b57cec5SDimitry Andric   visibility = old.visibility;
5520b57cec5SDimitry Andric   isUsedInRegularObj = old.isUsedInRegularObj;
5530b57cec5SDimitry Andric   exportDynamic = old.exportDynamic;
55485868e8aSDimitry Andric   inDynamicList = old.inDynamicList;
5550b57cec5SDimitry Andric   canInline = old.canInline;
55685868e8aSDimitry Andric   referenced = old.referenced;
5570b57cec5SDimitry Andric   traced = old.traced;
5580b57cec5SDimitry Andric   isPreemptible = old.isPreemptible;
5590b57cec5SDimitry Andric   scriptDefined = old.scriptDefined;
5600b57cec5SDimitry Andric   partition = old.partition;
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   // Symbol length is computed lazily. If we already know a symbol length,
5630b57cec5SDimitry Andric   // propagate it.
5640b57cec5SDimitry Andric   if (nameData == old.nameData && nameSize == 0 && old.nameSize != 0)
5650b57cec5SDimitry Andric     nameSize = old.nameSize;
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   // Print out a log message if --trace-symbol was specified.
5680b57cec5SDimitry Andric   // This is for debugging.
5690b57cec5SDimitry Andric   if (traced)
5700b57cec5SDimitry Andric     printTraceSymbol(this);
5710b57cec5SDimitry Andric }
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric void maybeWarnUnorderableSymbol(const Symbol *sym);
574480093f4SDimitry Andric bool computeIsPreemptible(const Symbol &sym);
5755ffd83dbSDimitry Andric void reportBackrefs();
5765ffd83dbSDimitry Andric 
5775ffd83dbSDimitry Andric // A mapping from a symbol to an InputFile referencing it backward. Used by
5785ffd83dbSDimitry Andric // --warn-backrefs.
579e8d8bef9SDimitry Andric extern llvm::DenseMap<const Symbol *,
580e8d8bef9SDimitry Andric                       std::pair<const InputFile *, const InputFile *>>
581e8d8bef9SDimitry Andric     backwardReferences;
582480093f4SDimitry Andric 
583349cc55cSDimitry Andric // A tuple of (reference, extractedFile, sym). Used by --why-extract=.
584349cc55cSDimitry Andric extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
585349cc55cSDimitry Andric                    0>
586349cc55cSDimitry Andric     whyExtract;
587349cc55cSDimitry Andric 
5880b57cec5SDimitry Andric } // namespace elf
5890b57cec5SDimitry Andric } // namespace lld
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric #endif
592